In [1]:
# =================================================================
# 03b_chronicle_rain_analysis_and_standardization
# Actions: 
# 1. Standardize area (Mollweide) and recalculate PFDI for consistency.
# 2. Perform Spatial-Temporal Rainfall Intensity Analysis (30m - 24h).
# Note: Includes a 'Clean Slate' step to remove polluted intensity columns.
# =================================================================

import pandas as pd
import numpy as np
import os
from tqdm import tqdm

# --- 1. CONFIGURATION ---
RAIN_INPUT_PATH = r"D:\Development\RESEARCH\urban_flood_database\chronicle\imerg_rain_outputs\chronicle_urban_df_with_IMERG_FULL.pkl"
RAIN_MASTER_FILE_PATH = r"D:\Development\RESEARCH\urban_flood_database\chronicle\imerg_rain_outputs\chronicle_rain_master.pkl"

# Durations in minutes for peak intensity calculation
DURATIONS = [30, 60, 120, 240, 360, 720, 1440]

# --- 2. LOAD DATA ---
print(f"Loading chronicle rainfall data: {RAIN_INPUT_PATH}")
chronicle_events = pd.read_pickle(RAIN_INPUT_PATH)

Loading chronicle rainfall data: D:\Development\RESEARCH\urban_flood_database\chronicle\imerg_rain_outputs\chronicle_urban_df_with_IMERG_FULL.pkl


In [2]:
chronicle_events.columns

Index(['Unnamed: 0', 'uuid', 'area_km2', 'version', 'start_time', 'end_time',
       'duration_days', 'geometry_wkt', 'urban_built_up_area_m2',
       'polygon_total_area_m2', 'urban_percentage', 'event_id',
       'poly_area_km2', 'upa_max', 'upa_p95', 'upa_p99', 'PFDI_p95',
       'PFDI_p99', 'PFDI_max', 'imerg_matrix', 'imerg_mask', 'imerg_meta'],
      dtype='object')

In [3]:
# =================================================================
# STEP 0: CLEAN SLATE (PREVENT COLUMN POLLUTION)
# =================================================================
# Removing any old intensity columns to ensure we don't carry over logic errors
old_intensity_cols = [c for c in chronicle_events.columns if 'max_rainfall_intens' in c]
if old_intensity_cols:
    print(f"Removing {len(old_intensity_cols)} existing intensity columns for a clean run...")
    chronicle_events = chronicle_events.drop(columns=old_intensity_cols)

# =================================================================
# ACTION 1: AREA STANDARDIZATION & MULTI-PFDI SYNC
# =================================================================
print("Step 1: Standardizing area and syncing PFDI metrics (p95, p99, max)...")

# Update master area to km2 using the precision Mollweide area
chronicle_events['area_km2'] = chronicle_events['polygon_total_area_m2'] / 1e6

# Synchronizing all PFDI metrics with the new standardized area
metrics_to_sync = [
    ('upa_p95', 'PFDI_p95'),
    ('upa_p99', 'PFDI_p99'),
    ('upa_max', 'PFDI_max')
]

for upa_col, pfdi_col in metrics_to_sync:
    if upa_col in chronicle_events.columns:
        chronicle_events[pfdi_col] = np.where(
            chronicle_events['area_km2'] > 0,
            chronicle_events[upa_col] / chronicle_events['area_km2'],
            0
        )

# Remove redundant columns to keep the dataset lean
redundant_cols = ['polygon_total_area_m2', 'poly_area_km2', 'Unnamed: 0', 'version']
chronicle_events = chronicle_events.drop(columns=[c for c in redundant_cols if c in chronicle_events.columns])

# =================================================================
# ACTION 2: INTENSITY ANALYSIS (Spatial then Temporal)
# =================================================================
print(f"Step 2: Calculating Peak Intensities for {len(chronicle_events)} events...")

event_intensity_list = []

for idx, row in tqdm(chronicle_events.iterrows(), total=len(chronicle_events)):
    rain_matrix = row['imerg_matrix']
    polygon_mask = row['imerg_mask']
    
    if not isinstance(rain_matrix, np.ndarray) or rain_matrix.size == 0 or polygon_mask.sum() == 0:
        continue

    # Spatial average across the entire polygon mask
    spatial_mean_series = np.nanmean(rain_matrix[:, polygon_mask == 1], axis=1)
    hyetograph = pd.Series(spatial_mean_series)

    peak_stats = {'event_id': row['event_id']}
    
    # Temporal rolling max for each duration
    for duration_min in DURATIONS:
        window_steps = int(duration_min / 30)
        if len(hyetograph) >= window_steps:
            peak_val = hyetograph.rolling(window=window_steps).mean().max()
            peak_stats[f"{duration_min}_max_rainfall_intens"] = peak_val
        else:
            peak_stats[f"{duration_min}_max_rainfall_intens"] = np.nan
            
    event_intensity_list.append(peak_stats)

# =================================================================
# FINAL MERGE & SAVE
# =================================================================
intensity_summary = pd.DataFrame(event_intensity_list)
final_master_dataset = chronicle_events.merge(intensity_summary, on='event_id', how='left')

print(f"\nProcessing Complete. Total events: {len(final_master_dataset)}")
final_master_dataset.to_pickle(RAIN_MASTER_FILE_PATH)
print(f"SUCCESS! Clean master dataset saved to: {RAIN_MASTER_FILE_PATH}")

Step 1: Standardizing area and syncing PFDI metrics (p95, p99, max)...
Step 2: Calculating Peak Intensities for 633339 events...


100%|████████████████████████████████████████████████████████████████████████| 633339/633339 [05:49<00:00, 1811.76it/s]



Processing Complete. Total events: 633339
SUCCESS! Clean master dataset saved to: D:\Development\RESEARCH\urban_flood_database\chronicle\imerg_rain_outputs\chronicle_rain_master.pkl
