#### Set styling for plotting

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import seaborn as sns
sns.set_palette('colorblind')
from matplotlib.pyplot import tight_layout
# ##SETTING PARAMS FOR MATPLOTLIB FIGURES
plt.rcParams.update({"figure.figsize": (6, 6),
                 "axes.facecolor": "white",
                 "axes.edgecolor": "black"})
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=sns.color_palette('colorblind'))
##set font size
font = {'family': 'sans-serif',
       'weight': 'normal',
       'size': 14}
plt.rc('font', **font)
# ##PANDAS PLOTTING
pd.plotting.register_matplotlib_converters()

#### Step 1: save environment file

In [3]:
!conda env export > xml_met_environment.yml

#### Step 2: import modules

In [4]:
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv

## INFORMATION

### XML - Met Eirean new system for collecting/ notifying of adverse weather events 


#### Step 3: import files

In [5]:
## the xml file sneed to be parsed before we can use import or use in pandas so seeting file locations
data_directory_xml = "/mnt/hgfs/shared/weather_warnings/archive_warnings/archive"
full_path_xml = os.path.abspath(data_directory_xml)


#### Step 4: process XML data 

In [6]:
def severity_to_color(severity):
    """Map severity levels to warning colors"""
    mapping = {
        'Extreme': 'Red',
        'Severe': 'Orange',
        'Moderate': 'Yellow'
    }
    return mapping.get(severity, 'notmapped')

def get_element_text(element, path, namespace):
    """Safely get text from an XML element"""
    found = element.find(path, namespace)
    return found.text if found is not None else None

def parse_xml(file_path_xml):
    """Parse an XML file and extract relevant information."""
    try:
        # Read the file content
        with codecs.open(file_path_xml, 'r', encoding='utf-8', errors='ignore') as file:
            xml_content = file.read()
        
        # Parse the XML content
        root = ET.fromstring(xml_content)
        namespace = {'cap': 'urn:oasis:names:tc:emergency:cap:1.2'}
        
        # Extract data from the info element first to check if we should process this warning
        info = root.find('cap:info', namespace)
        if info is None:
            return None
            
        # Check for advisory warnings (type 22) - exclude them
        parameters = info.findall('cap:parameter', namespace)
        for param in parameters:
            if get_element_text(param, 'cap:valueName', namespace) == 'awareness_type':
                awareness_type = get_element_text(param, 'cap:value', namespace)
                if awareness_type and '22' in awareness_type:
                    return None
                break
        
        # Check for county information
        area = info.find('cap:area', namespace)
        if area is None:
            return None
            
        geocodes = area.findall('cap:geocode', namespace)
        if not geocodes:
            return None

        # Initialize row with the old column format
        row = {
            'Issue Time': get_element_text(root, 'cap:sent', namespace),
            'Valid From': (get_element_text(info, 'cap:effective', namespace) or 
                         get_element_text(info, 'cap:onset', namespace)),
            'Valid To': get_element_text(info, 'cap:expires', namespace),
            'Warning Element': get_element_text(info, 'cap:event', namespace),
            'Warning Text': get_element_text(info, 'cap:description', namespace),
            'WhereToText': get_element_text(area, 'cap:areaDesc', namespace),
            'Warning Colour': severity_to_color(get_element_text(info, 'cap:severity', namespace))
        }
        
        # Initialize all county columns to 0
        county_info = {
            'EI01': 'Carlow', 'EI02': 'Cavan', 'EI03': 'Clare', 'EI04': 'Cork', 'EI32': 'Cork City',
            'EI06': 'Donegal', 'EI33': 'Dublin City', 'EI34': 'Dún Laoghaire-Rathdown', 'EI35': 'Fingal',
            'EI10': 'Galway', 'EI36': 'Galway City', 'EI11': 'Kerry', 'EI12': 'Kildare', 'EI13': 'Kilkenny',
            'EI15': 'Laois', 'EI14': 'Leitrim', 'EI42': 'Limerick', 'EI37': 'Limerick City', 'EI18': 'Longford',
            'EI19': 'Louth', 'EI20': 'Mayo', 'EI21': 'Meath', 'EI22': 'Monaghan', 'EI23': 'Offaly',
            'EI24': 'Roscommon', 'EI25': 'Sligo', 'EI39': 'South Dublin', 'EI43': 'Tipperary',
            'EI44': 'Waterford', 'EI29': 'Westmeath', 'EI30': 'Wexford', 'EI31': 'Wicklow'
        }
        
        # Initialize all counties to 0
        for county_name in county_info.values():
            row[county_name] = 0
            
        # Set affected counties to 1
        for gc in geocodes:
            if get_element_text(gc, 'cap:valueName', namespace) == 'FIPS':
                county_code = get_element_text(gc, 'cap:value', namespace)
                if county_code in county_info:
                    row[county_info[county_code]] = 1
        
        return row
    
    except ET.ParseError as e:
        return None
    except Exception as e:
        return None

def process_files(data_directory):
    """Process all XML files in the given directory and extract weather warning data."""
    file_pattern = os.path.join(data_directory, '*.xml')
    file_list = glob.glob(file_pattern)

    data = []
    error_files = []
    excluded_files = []

    for file in tqdm(file_list, desc="Processing files"):
        row = parse_xml(file)
        if row is not None:
            data.append(row)
        else:
            try:
                with codecs.open(file, 'r', encoding='utf-8', errors='ignore') as f:
                    ET.parse(f)
                excluded_files.append(file)
            except:
                error_files.append(file)

    # Create DataFrame
    df_xml = pd.DataFrame(data)

    # Convert date fields to datetime
    date_columns = ['Issue Time', 'Valid From', 'Valid To']
    for col in date_columns:
        if col in df_xml.columns:
            df_xml[col] = pd.to_datetime(df_xml[col], utc=True, errors='coerce')

    return df_xml, error_files, excluded_files

def save_error_files(file_list, filename):
    """Save list of error files to CSV"""
    pd.DataFrame({'file': file_list}).to_csv(filename, index=False)


df_xml, error_files, excluded_files = process_files(data_directory_xml)

# Save results
# df_xml.to_csv('weather_warnings.csv', index=False)
save_error_files(error_files, 'error_files.csv')
save_error_files(excluded_files, 'excluded_files.csv')

Processing files: 100%|█████████████████████████████████████████████████████| 10697/10697 [16:46<00:00, 10.63it/s]


In [7]:
# check the first few rows
print(df_xml.head())

# look at dataframe info
print(df_xml.info())

# check the df shape
print(f"Number of rows: {df_xml.shape[0]}")
print(f"Number of columns: {df_xml.shape[1]}")

print("XML Filtered Date Range:")
print(f"Start date: {df_xml['Issue Time'].min()}")
print(f"End date: {df_xml['Issue Time'].max()}")

                 Issue Time                Valid From  \
0 2018-03-29 19:46:16+00:00 2018-03-22 23:00:01+00:00   
1 2018-03-29 19:50:05+00:00 2018-03-29 19:00:01+00:00   
2 2018-03-29 21:56:16+00:00 2018-03-29 19:00:01+00:00   
3 2018-03-29 23:10:21+00:00 2018-03-29 19:00:01+00:00   
4 2018-03-30 04:15:33+00:00 2018-03-29 19:00:01+00:00   


0  Heavy showery rain continuing overnight with s...     Ireland   
1  Heavy showery rain overnight with some wintry ...     Ireland   
2  Heavy showery rain overnight with some wintry ...     Ireland   
3  Heavy showery rain overnight with some wintry ...     Ireland   
4  Heavy showery rain overnight with some wintry ...     Ireland   

0         Yellow       1      1      1  ...         1       1          1   
1         Yellow       1      1      1  ...         1       1          1   
2         Yellow       1      1      1  ...         1       1          1   
3         Yellow       1      1      1  ...         1       1          1   
4         Y

#### Step 5: Cleaning and processing data into a common format for combination

In [8]:
##list all columns (check does xml have provinces)
df_xml.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8040 entries, 0 to 8039
Data columns (total 39 columns):
 #   Column                  Non-Null Count  Dtype              
---  ------                  --------------  -----              
 0   Issue Time              8040 non-null   datetime64[ns, UTC]
 1   Valid From              8040 non-null   datetime64[ns, UTC]
 2   Valid To                8039 non-null   datetime64[ns, UTC]
 5   WhereToText             8040 non-null   object             
 7   Carlow                  8040 non-null   int64              
 8   Cavan                   8040 non-null   int64              
 9   Clare                   8040 non-null   int64              
 10  Cork                    8040 non-null   int64              
 11  Cork City               8040 non-null   int64              
 12  Donegal                 8040 non-null   int64              
 13  Dublin City             8040 non-null   int64              
 14  Dún Laoghaire-Rathdown  8040 non-null   int

##### data has no provinces or specifc ireland columns

##### Step 5.1 check for missing values and duplicates

In [9]:
total_missing = df_xml.isnull().sum().sum()
percent_missing = (total_missing / df_xml.size) * 100
print(f"Total missing values: {total_missing}")
print(f"Percentage of missing values: {percent_missing:.2f}%")


Total missing values: 1
Percentage of missing values: 0.00%


In [10]:
# All rows with any missing values
rows_with_missing = df_xml[df_xml.isnull().any(axis=1)]
rows_with_missing.head()

Unnamed: 0,Issue Time,Valid From,Valid To,Warning Element,Warning Text,WhereToText,Warning Colour,Carlow,Cavan,Clare,...,Monaghan,Offaly,Roscommon,Sligo,South Dublin,Tipperary,Waterford,Westmeath,Wexford,Wicklow
212,2017-10-16 11:44:33+00:00,2017-10-16 11:44:33+00:00,NaT,Severe Wind warning,Cyclonic variable Storm force 10 to Hurricane ...,Ireland,Orange,1,1,1,...,1,1,1,1,0,0,0,1,1,1


In [11]:
##look for duplicates
duplicate_rows = df_xml.duplicated()
duplicate_rows.sum()
df_xml[duplicate_rows]

Unnamed: 0,Issue Time,Valid From,Valid To,Warning Element,Warning Text,WhereToText,Warning Colour,Carlow,Cavan,Clare,...,Monaghan,Offaly,Roscommon,Sligo,South Dublin,Tipperary,Waterford,Westmeath,Wexford,Wicklow
163,2017-10-12 14:10:06+00:00,2017-10-12 14:10:06+00:00,2017-10-17 04:00:00+00:00,Moderate Advisory warning,A combination of a vigorous Atlantic weather s...,Ireland,Yellow,1,1,1,...,1,1,1,1,0,0,0,1,1,1
170,2017-10-13 08:44:24+00:00,2017-10-13 08:44:24+00:00,2017-10-17 04:00:00+00:00,Moderate Advisory warning,"On Monday, an Atlantic storm from the remnants...",Ireland,Yellow,1,1,1,...,1,1,1,1,0,0,0,1,1,1
405,2017-12-28 10:16:26+00:00,2017-12-28 10:00:01+00:00,2017-12-29 03:00:01+00:00,Moderate Snow-ice warning,Frost and icy conditions will develop again th...,"Leinster, Cavan, Monaghan and Donegal",Yellow,1,1,0,...,1,1,0,0,0,0,0,1,1,1
787,2018-02-09 06:03:39+00:00,2018-02-09 05:00:01+00:00,2018-02-09 12:00:01+00:00,Moderate Snow-ice warning,"Update\nThis morning, scattered snow showers m...",Ireland,Yellow,1,1,1,...,1,1,1,1,0,0,0,1,1,1
1187,2018-02-28 20:53:25+00:00,2018-02-28 20:00:01+00:00,2018-03-01 12:00:01+00:00,Extreme Snow-ice warning,Update\nFurther disruptive heavy snow showers ...,"Dublin, Kildare, Louth, Wexford, Wicklow, Meat...",Red,0,0,0,...,0,0,0,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5781,2021-02-08 17:19:36+00:00,2021-02-08 16:00:01+00:00,2021-02-09 18:00:01+00:00,Moderate Snow-ice warning,Snow accumulations of 2 to 5 cm in places.,"Dublin, Kildare, Louth, Wicklow, Meath and Mon...",Yellow,0,0,0,...,1,0,0,0,0,0,0,0,0,1
5783,2021-02-08 23:16:58+00:00,2021-02-08 16:00:01+00:00,2021-02-09 18:00:01+00:00,Moderate Snow-ice warning,Snow accumulations of 2 to 5 cm in places.,"Dublin, Kildare, Louth, Wicklow, Meath and Mon...",Yellow,0,0,0,...,1,0,0,0,0,0,0,0,0,1
5785,2021-02-08 23:18:07+00:00,2021-02-08 16:00:01+00:00,2021-02-09 18:00:01+00:00,Moderate Snow-ice warning,Snow accumulations of 2 to 5 cm in places.,"Dublin, Kildare, Louth, Wicklow, Meath and Mon...",Yellow,0,0,0,...,1,0,0,0,0,0,0,0,0,1
5787,2021-02-09 05:21:27+00:00,2021-02-08 16:00:01+00:00,2021-02-09 18:00:01+00:00,Moderate Snow-ice warning,Snow accumulations of 2 to 5 cm in places.,"Dublin, Kildare, Louth, Wicklow, Meath and Mon...",Yellow,0,0,0,...,1,0,0,0,0,0,0,0,0,1


##### Step 5.2 XML data has a lot of duplicates for the same event just issued multiple times, so this will consolidate into single events

ODS has 26 counties 
Clare                       int64
Cork                        int64
Kerry                       int64
Limerick                    int64
Tipperary                   int64
Waterford                   int64
Carlow                      int64
Dublin                      int64
Kildare                     int64
Kilkenny                    int64
Laois                       int64
Longford                    int64
Louth                       int64
Meath                       int64
Offaly                      int64
Westmeath                   int64
Wexford                     int64
Wicklow                     int64
Cavan                       int64
Donegal                     int64
Monaghan                    int64
Galway                      int64
Leitrim                     int64
Mayo                        int64
Roscommon                   int64
Sligo                       int64

XML has 
Carlow                  8040 non-null   int64              
Cavan                   8040 non-null   int64              
 9   Clare                   8040 non-null   int64              
 10  Cork                    8040 non-null   int64              
 11  Cork City               8040 non-null   int64              
 12  Donegal                 8040 non-null   int64              
 13  Dublin City             8040 non-null   int64              
 14  Dún Laoghaire-Rathdown  8040 non-null   int64              
 15  Fingal                  8040 non-null   int64              
 16  Galway                  8040 non-null   int64              
 17  Galway City             8040 non-null   int64              
 18  Kerry                   8040 non-null   int64              
 19  Kildare                 8040 non-null   int64              
 20  Kilkenny                8040 non-null   int64              
 21  Laois                   8040 non-null   int64              
 22  Leitrim                 8040 non-null   int64              
 23  Limerick                8040 non-null   int64              
 24  Limerick City           8040 non-null   int64              
 25  Longford                8040 non-null   int64              
 26  Louth                   8040 non-null   int64              
 27  Mayo                    8040 non-null   int64              
 28  Meath                   8040 non-null   int64              
 29  Monaghan                8040 non-null   int64              
 30  Offaly                  8040 non-null   int64              
 31  Roscommon               8040 non-null   int64              
 32  Sligo                   8040 non-null   int64              
 33  South Dublin            8040 non-null   int64              
 34  Tipperary               8040 non-null   int64              
 35  Waterford               8040 non-null   int64              
 36  Westmeath               8040 non-null   int64              
 37  Wexford                 8040 non-null   int64              
 38  Wicklow                 8040 non-null   int64  

 so 
 Dublin shoudl consist of : Dublin City, Dun Laoghaire/Rathdown, South Dublin, Fingal 
Limerick: Limerick + Limerick City
Cork : Cork and Cork City 
Galway : Galway and Galway city 

In [50]:
####duplictes could be due to the expanded list of location compared to the ods data and additionally multiple notificatiosn of the same event 
####consolidate the data into single events 

def identify_and_aggregate_unique_events(df):
    # First, convert datetime columns to date only for matching
    df_processed = df.copy()
    
    # Convert datetime columns to date more safely
    df_processed['Valid_From_Date'] = pd.to_datetime(df_processed['Valid From']).dt.date
    df_processed['Valid_To_Date'] = pd.to_datetime(df_processed['Valid To']).dt.date
    
    # Create a unique event identifier combining our matching criteria
    df_processed['event_id'] = (
        df_processed['Valid_From_Date'].astype(str) + '_' +
        df_processed['Valid_To_Date'].astype(str) + '_' +
        df_processed['Warning Colour'] + '_' +
        df_processed['Warning Element'] + '_' +
        df_processed['WhereToText']
    )
    
    # Now aggregate regions within each unique event
    def aggregate_regions_row(row):
        # Dublin aggregation
        row['Dublin'] = int(
            row['Dublin City'] | 
            row['Dún Laoghaire-Rathdown'] | 
            row['South Dublin'] | 
            row['Fingal']
        )
        
        # Limerick aggregation
        row['Limerick'] = int(row['Limerick'] | row['Limerick City'])
        
        # Cork aggregation
        row['Cork'] = int(row['Cork'] | row['Cork City'])
        
        # Galway aggregation
        row['Galway'] = int(row['Galway'] | row['Galway City'])
        
        return row
    
    # Apply aggregation
    df_processed = df_processed.apply(aggregate_regions_row, axis=1)
    
    # Drop the original region columns
    columns_to_drop = [
        'Dublin City', 'Dún Laoghaire-Rathdown', 'South Dublin', 'Fingal',
        'Limerick City', 'Cork City', 'Galway City',
        'Valid_From_Date', 'Valid_To_Date'  # Drop our temporary date columns
    ]
    
    df_processed = df_processed.drop(columns=columns_to_drop)
    
    # Keep only the first occurrence of each unique event
    df_unique = df_processed.drop_duplicates(subset=['event_id'])
    
    # Drop the event_id column, only needed for processing
    df_unique = df_unique.drop(columns=['event_id'])
    
    return df_unique

def verify_aggregation(original_df, aggregated_df):
    print(f"Original number of rows: {len(original_df)}")
    print(f"Aggregated number of rows: {len(aggregated_df)}")
    
    # Convert dates and create a temporary DataFrame for verification
    temp_df = original_df.copy()
    temp_df['Valid_From_Date'] = pd.to_datetime(temp_df['Valid From']).dt.date
    temp_df['Valid_To_Date'] = pd.to_datetime(temp_df['Valid To']).dt.date
    
    # Count unique events in original data
    original_events = temp_df.groupby([
        'Valid_From_Date',
        'Valid_To_Date',
        'Warning Colour',
        'Warning Element',
        'WhereToText'
    ]).size().reset_index()
    
    print(f"Number of unique events by criteria: {len(original_events)}")
    
    # Additional verification
    print("\nSample of unique events:")
    print(original_events.head())

# Apply aggregation and verify results
df_final = identify_and_aggregate_unique_events(df_xml)
verify_aggregation(df_xml, df_final)

Original number of rows: 8040
Aggregated number of rows: 1494
Number of unique events by criteria: 1493

Sample of unique events:

                                         WhereToText  0  
0                                            Ireland  2  
1  Connacht,Kilkenny,Donegal,Clare,Limerick,Tippe...  5  
2                                         Cork,Kerry  4  
3  Connacht,Kilkenny,Donegal,Clare,Limerick,Tippe...  2  
4                                            Ireland  3  


In [51]:
####check for duplicates again after processing 
duplicate_rows = df_final.duplicated()
duplicate_rows.sum()
df_final[duplicate_rows]

Unnamed: 0,Issue Time,Valid From,Valid To,Warning Element,Warning Text,WhereToText,Warning Colour,Carlow,Cavan,Clare,...,Monaghan,Offaly,Roscommon,Sligo,Tipperary,Waterford,Westmeath,Wexford,Wicklow,Dublin


##### Step 5.3 convert true/false to 0/1

In [53]:
# Identify all boolean columns in the DataFrame
bool_cols = df_final.select_dtypes(include=['bool']).columns

# Convert boolean columns to integers (True -> 1, False -> 0)
df_final[bool_cols] = df_final[bool_cols].astype(int)

# Verify the changes by displaying data types
print("Data types after conversion:")
print(df_final.dtypes)

Data types after conversion:
Issue Time         datetime64[ns, UTC]
Valid From         datetime64[ns, UTC]
Valid To           datetime64[ns, UTC]
WhereToText                     object
Carlow                           int64
Cavan                            int64
Clare                            int64
Cork                             int64
Donegal                          int64
Galway                           int64
Kerry                            int64
Kildare                          int64
Kilkenny                         int64
Laois                            int64
Leitrim                          int64
Limerick                         int64
Longford                         int64
Louth                            int64
Mayo                             int64
Meath                            int64
Monaghan                         int64
Offaly                           int64
Roscommon                        int64
Sligo                            int64
Tipperary                        in

##### Step 5.4 confirm date/time settings, check date ranges

In [54]:
##make sure datetime is the same in both dataframes
# Ensure datetime columns are consistently UTC
datetime_cols = ['Issue Time', 'Valid From', 'Valid To']
df_final[datetime_cols] = df_final[datetime_cols].apply(pd.to_datetime, utc=True)

##### Step 5.5 filter data for dates of interest 2013 to 2020 (full years of data)

In [55]:
print("XML Filtered Date Range:")
print(f"Start date: {df_final['Issue Time'].min()}")
print(f"End date: {df_final['Issue Time'].max()}")

XML Filtered Date Range:
Start date: 2017-09-25 04:34:48+00:00
End date: 2023-08-05 05:21:50+00:00


In [56]:
### Filter XML data from 2018 to end of 2022
### Although we have data in the ODS format from 2017 to 2020 using the years from both datasets 2018,2019,2020 can help validate my processing steps  
df_xml_filtered = df_final[
    (df_final['Issue Time'] >= '2018-01-01') & 
    (df_final['Issue Time'] <= '2023-08-04 23:59:59')
]

##### Step 5.5 drop unneeded columns 

In [57]:
##drop columns
columns_to_drop = ['WhereToText', 'Warning Text']
df_xml_filtered = df_xml_filtered.drop(columns=columns_to_drop)

In [58]:
####quick final check 
df_xml_filtered.head(10)

Unnamed: 0,Issue Time,Valid From,Valid To,Warning Element,Warning Colour,Carlow,Cavan,Clare,Cork,Donegal,...,Monaghan,Offaly,Roscommon,Sligo,Tipperary,Waterford,Westmeath,Wexford,Wicklow,Dublin
0,2018-03-29 19:46:16+00:00,2018-03-22 23:00:01+00:00,2018-03-23 09:00:01+00:00,Moderate Hail warning,Yellow,1,1,1,1,1,...,1,1,1,1,0,0,1,1,1,0
1,2018-03-29 19:50:05+00:00,2018-03-29 19:00:01+00:00,2018-03-30 09:00:01+00:00,Moderate Hail warning,Yellow,1,1,1,1,1,...,1,1,1,1,0,0,1,1,1,0
3,2018-03-29 23:10:21+00:00,2018-03-29 19:00:01+00:00,2018-03-30 09:00:01+00:00,Moderate Snow-ice warning,Yellow,1,1,1,1,1,...,1,1,1,1,0,0,1,1,1,0
5,2018-03-31 17:10:37+00:00,2018-03-31 17:00:01+00:00,2018-04-02 11:00:01+00:00,Moderate Rainfall warning,Yellow,0,0,0,1,0,...,0,0,0,0,0,0,0,1,1,0
15,2018-04-04 12:10:08+00:00,2018-03-31 17:00:01+00:00,2018-04-06 13:00:01+00:00,Moderate Rainfall warning,Yellow,1,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
17,2018-04-04 12:13:58+00:00,2018-04-04 12:00:01+00:00,2018-04-06 13:00:01+00:00,Moderate Rainfall warning,Yellow,1,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
19,2018-04-04 12:58:25+00:00,2018-04-04 12:00:01+00:00,2018-04-05 08:00:01+00:00,Moderate Low-Temperature warning,Yellow,1,1,1,1,1,...,1,1,1,1,0,0,1,1,1,0
37,2018-04-05 14:51:09+00:00,2018-04-05 14:43:16+00:00,2018-04-06 14:00:01+00:00,Moderate Rainfall warning,Yellow,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
38,2018-04-05 14:53:15+00:00,2018-04-05 14:00:01+00:00,2018-04-06 14:00:01+00:00,Moderate Rainfall warning,Yellow,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
60,2018-04-06 05:57:24+00:00,2018-04-06 05:00:01+00:00,2018-04-06 17:00:01+00:00,Moderate Wind warning,Yellow,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


##### Step 6 save filtered data to file 

In [59]:
df_xml_filtered.to_csv('/mnt/hgfs/shared/project_data/met_eireann/xml_warnings_2018_2023_08.csv', index=False)