In [None]:
import pandas as pd
from datetime import datetime

def load_weather_warnings(file_path):
    """
    Load weather warnings from a CSV file and perform initial data cleaning.
    This function preserves all county columns and ensures proper datetime formatting.
    """
    df = pd.read_csv(file_path)
    
    # Convert datetime columns to proper datetime objects
    datetime_columns = ['Issue Time', 'Valid From', 'Valid To']
    for col in datetime_columns:
        df[col] = pd.to_datetime(df[col])
    
    return df

def consolidate_warnings(df):
    """
    Consolidate weather warnings by grouping similar events.
    Uses a flat aggregation structure to avoid nested dictionary errors.
    """
    # First, let's identify our county columns
    base_columns = ['Issue Time', 'Valid From', 'Valid To', 'Warning Element',
                   'Warning Text', 'WhereToText', 'Warning Colour']
    county_columns = [col for col in df.columns if col not in base_columns]
    
    # Create the event key for grouping
    df['event_key'] = df.apply(
        lambda x: f"{x['Valid To']}_{x['Warning Element']}_{x['Warning Colour']}_{x['Warning Text']}", 
        axis=1
    )
    
    # Create a flat aggregation dictionary
    agg_dict = {
        'Issue Time': 'first',                    # Keep the first issue time
        'Valid From': 'first',
        'Valid To': 'first',
        'Warning Element': 'first',
        'Warning Text': 'first',
        'WhereToText': 'first',
        'Warning Colour': 'first'
    }
    
    # Add county columns to aggregation
    for col in county_columns:
        agg_dict[col] = 'first'
    
    # First grouping to get the basic consolidated data
    df_consolidated = df.groupby('event_key').agg(agg_dict).reset_index()
    
    # Now calculate the additional metrics separately
    issue_counts = df.groupby('event_key').size().reset_index(name='issue_count')
    first_issues = df.groupby('event_key')['Issue Time'].min().reset_index(name='first_issue')
    last_issues = df.groupby('event_key')['Issue Time'].max().reset_index(name='last_issue')
    
    # Merge all the metrics back together
    df_consolidated = (df_consolidated
                      .merge(issue_counts, on='event_key')
                      .merge(first_issues, on='event_key')
                      .merge(last_issues, on='event_key'))
    
    # Rename columns to match desired output
    rename_dict = {
        'Warning Element': 'warning_type',
        'Warning Text': 'warning_text',
        'WhereToText': 'location',
        'Warning Colour': 'warning_colour'
    }
    df_consolidated = df_consolidated.rename(columns=rename_dict)
    
    # Arrange columns in the desired order
    column_order = [
        'event_key', 
        'Issue Time', 
        'issue_count',
        'first_issue',
        'last_issue',
        'Valid From',
        'Valid To',
        'warning_type',
        'warning_text',
        'location',
        'warning_colour'
    ] + county_columns
    
    # Only select columns that exist
    final_columns = [col for col in column_order if col in df_consolidated.columns]
    df_consolidated = df_consolidated[final_columns]
    
    return df_consolidated

def analyze_warnings(df):
    """
    Generate summary statistics about the weather warnings.
    """
    analysis = {
        'total_warnings': len(df),
        'unique_events': df['event_key'].nunique(),
        'warning_types': df['warning_type'].value_counts().to_dict(),
        'most_reissued': df.nlargest(1, 'issue_count')[['warning_type', 'location', 'issue_count']].to_dict('records')[0],
        'avg_issues_per_event': df['issue_count'].mean()
    }
    return analysis

def process_weather_warnings(input_file, output_file):
    """
    Main function to process weather warnings data.
    """
    print(f"Loading data from {input_file}...")
    df = load_weather_warnings(input_file)
    
    print("Consolidating warnings...")
    df_xml_consolidated = consolidate_warnings(df)
    
    print("Analyzing results...")
    analysis_results = analyze_warnings(df_xml_consolidated)
    
    print("\nWeather Warnings Analysis Summary:")
    print(f"Total warnings issued: {analysis_results['total_warnings']}")
    print(f"Number of unique events: {analysis_results['unique_events']}")
    print(f"\nWarning types frequency:")
    for warning_type, count in analysis_results['warning_types'].items():
        print(f"- {warning_type}: {count}")
    print(f"\nMost reissued warning:")
    print(f"- Type: {analysis_results['most_reissued']['warning_type']}")
    print(f"- Location: {analysis_results['most_reissued']['location']}")
    print(f"- Times issued: {analysis_results['most_reissued']['issue_count']}")
    print(f"\nAverage issues per event: {analysis_results['avg_issues_per_event']:.2f}")
    
    print(f"\nExporting consolidated data to {output_file}...")
    df_xml_consolidated.to_csv(output_file, index=False)
    print("Export complete!")
    
    return df_xml_consolidated

from pathlib import Path
input_file = "/mnt/hgfs/shared/ul_project_Msc_AI/data/met_eireann/eda/output.csv"
output_file = "/mnt/hgfs/shared/ul_project_Msc_AI/data/met_eireann/eda/consolidated_weather_warnings.csv"
df_xml_consolidated = process_weather_warnings(input_file, output_file)