In [18]:
import pandas as pd
from datetime import datetime, timedelta
import os
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import pyart
from math import radians, cos
import osmnx as ox
from datetime import datetime, date
import matplotlib.cm as cm
import matplotlib.colors as mcolors


Plot locations of hail reports on top of 1: concurrent radar reflectivity and 2: MESH

In [45]:
def add_gridlines(ax, extent, spacing_lat=None, spacing_lon=None):
    lon_min, lon_max, lat_min, lat_max = extent
    if spacing_lat is None:
        spacing_lat = max((lat_max - lat_min) / 5, 0.01) 
    if spacing_lon is None:
        spacing_lon = max((lon_max - lon_min) / 5, 0.01)

    gl = ax.gridlines(draw_labels=True, linewidth=0.5, color='gray', alpha=0.5, linestyle='--')
    gl.top_labels = False
    gl.right_labels = False

    import matplotlib.ticker as mticker
    import numpy as np
    gl.xlocator = mticker.FixedLocator(np.arange(lon_min, lon_max + spacing_lon, spacing_lon))
    gl.ylocator = mticker.FixedLocator(np.arange(lat_min, lat_max + spacing_lat, spacing_lat))

    gl.xlabel_style = {'size': 8}
    gl.ylabel_style = {'size': 8}


def parse_report_time(timestr):
    """Parse HHMM time string and assign date based on cutoff at 12:00 (noon).
    Returns a datetime object with date 2024-08-17 if time >= 12:00,
    or date 2024-08-18 if time < 12:00.
    """
    timestr = str(timestr).strip()
    if '.' in timestr:
        timestr = timestr.split('.')[0]
    t = datetime.strptime(timestr.zfill(4), "%H%M").time()

    cutoff = 12 * 100  # 1200 in HHMM int format
    time_int = t.hour * 100 + t.minute
    if time_int >= cutoff:
        assigned_date = date(2024, 8, 17)
    else:
        assigned_date = date(2024, 8, 18)

    return datetime.combine(assigned_date, t)

def find_closest_radar_file(report_dt, radar_files):
    """Return radar file closest in time to the full report datetime."""
    def file_time(fn):
        s = os.path.basename(fn)
        return datetime.strptime(s[4:17], '%Y%m%d_%H%M')

    radar_times = [file_time(f) for f in radar_files]
    closest = min(radar_times, key=lambda t: abs(report_dt - t))
    
    # Optional: skip if >30 min away
    if abs((report_dt - closest).total_seconds()) > 1800:
        return None

    return radar_files[radar_times.index(closest)]


def domain_size_km(lat_min, lat_max, lon_min, lon_max):
    """Calculate approximate max dimension of domain in km."""
    avg_lat = (lat_min + lat_max) / 2
    lat_km = (lat_max - lat_min) * 111
    lon_km = (lon_max - lon_min) * 111 * cos(radians(avg_lat))
    return max(lat_km, lon_km)

def download_osm_features(lat_min, lat_max, lon_min, lon_max):
    size_km = domain_size_km(lat_min, lat_max, lon_min, lon_max)
    print('Size: ', size_km, 'km')
    
    # Thresholds (example, tweak to your liking)
    # Smaller size -> more detail
    road_detail_levels = [
        (5, ["motorway", "trunk", "primary", "secondary", "tertiary", "unclassified", "residential", "service", "track", "path"]),
        (20, ["motorway", "trunk", "primary", "secondary", "tertiary", "unclassified", "residential"]),
        (50, ["motorway", "trunk", "primary", "secondary"]),
        (200, ["motorway", "trunk", "primary"]),
        (500, ["motorway", "trunk"]),
        (float('inf'), ["motorway"])
    ]

    city_detail_levels = [
        (50, ["city", "town", "village", "hamlet"]),
        (100, ["city", "town", "village"]),
        (150, ["city", "town"]),
        (500, ["city"])
    ]

    water_detail_levels = [
        (5, {"natural": ["water"], "waterway": ["river", "stream", "canal", "drain", "ditch"]}),
        (20, {"natural": ["water"], "waterway": ["river", "stream", "canal"]}),
        (500, {"natural": ["water"], "waterway": ["river"]}),
        (float('inf'), {"natural": ["water"]})
    ]

    def select_level(levels, size):
        for threshold, val in levels:
            if size <= threshold:
                return val
        return levels[-1][1]

    # Select tags based on domain size
    highway_tags = select_level(road_detail_levels, size_km)
    city_tags = select_level(city_detail_levels, size_km)
    water_tags = select_level(water_detail_levels, size_km)

    # Download roads
    print("Downloading roads with tags:", highway_tags)
    try:
        roads = ox.features.features_from_bbox(
            (lon_min, lat_min, lon_max, lat_max),
            tags={"highway": highway_tags}
        )
        roads = roads[roads.geometry.type.isin(["LineString", "MultiLineString"])]
    except Exception as e:
        print(f"⚠️ Failed to download roads: {e}")
        roads = None

    # Download water
    print("Downloading water features with tags:", water_tags)
    try:
        water = ox.features.features_from_bbox(
            (lon_min, lat_min, lon_max, lat_max),
            tags=water_tags
        )
        water = water[water.geometry.type.isin(["Polygon", "MultiPolygon", "LineString", "MultiLineString"])]
    except Exception as e:
        print(f"⚠️ Failed to download water features: {e}")
        water = None

    # Download cities
    print("Downloading cities with tags:", city_tags)
    try:
        cities = ox.features.features_from_bbox(
            (lon_min, lat_min, lon_max, lat_max),
            tags={"place": city_tags}
        )
        cities = cities[cities.geometry.type == "Point"]
    except Exception as e:
        print(f"⚠️ Failed to download cities: {e}")
        cities = None

    return roads, water, cities

def plot_storm_reports_on_radar(
    hail_csv: str,
    wind_csv: str,
    data_dir: str,
    output_dir: str,
    domain_bounds: tuple,
    field: str = "reflectivity",
    sweep: int = 0,
    buffer_deg: float = 0.5,
    cmap: str = "pyart_NWSRef",
    vmin: float = -20,
    vmax: float = 75,
    dpi: int = 200
):
    import os
    os.makedirs(output_dir, exist_ok=True)
    lat_min, lat_max, lon_min, lon_max = domain_bounds

    # Load both datasets and label them
    hail = pd.read_csv(hail_csv, delimiter='\t' if '\t' in open(hail_csv).readline() else ',', skiprows=1)
    wind = pd.read_csv(wind_csv, delimiter='\t' if '\t' in open(wind_csv).readline() else ',')
    hail["reporttype"] = "Hail"
    wind["reporttype"] = "Wind"
    hail.columns = [c.lower() for c in hail.columns]
    wind.columns = [c.lower() for c in wind.columns]
    all_reports = pd.concat([hail, wind], ignore_index=True)

    all_reports = all_reports.dropna(subset=["lat", "lon", "time"])
    all_reports = all_reports[
        (all_reports["lat"] >= lat_min) & (all_reports["lat"] <= lat_max) &
        (all_reports["lon"] >= lon_min) & (all_reports["lon"] <= lon_max)
    ]

    radar_files = sorted([f for f in os.listdir(data_dir) if f.endswith('_V06')])
    radar_files = [os.path.join(data_dir, f) for f in radar_files]

    for i, row in all_reports.iterrows():
        try:
            report_time = parse_report_time(str(row["time"]))
            lat = float(row["lat"])
            lon = float(row["lon"])
        except Exception as e:
            print(f"⚠️ Skipping row {i}: {e}")
            continue

        radar_file = find_closest_radar_file(report_time, radar_files)
        if radar_file is None:
            print(f"⚠️ No nearby radar file found for report at {report_time}")
            continue

        try:
            radar = pyart.io.read(radar_file)
        except Exception as e:
            print(f"⚠️ Could not read radar file {radar_file}: {e}")
            continue

        if field not in radar.fields:
            print(f"⚠️ Field `{field}` not in radar file {radar_file}")
            continue

        lat_pad_min = lat - buffer_deg
        lat_pad_max = lat + buffer_deg
        lon_pad_min = lon - buffer_deg
        lon_pad_max = lon + buffer_deg
        extent = [lon_pad_min, lon_pad_max, lat_pad_min, lat_pad_max]

        road_gdf, water_gdf, city_gdf = download_osm_features(lat_pad_min, lat_pad_max, lon_pad_min, lon_pad_max)
        domain_km = domain_size_km(lat_pad_min, lat_pad_max, lon_pad_min, lon_pad_max)

        fig = plt.figure(figsize=(6, 4), dpi=dpi)
        ax = plt.subplot(1, 1, 1, projection=ccrs.PlateCarree())
        ax.set_extent(extent)

        ax.add_feature(cfeature.STATES.with_scale('10m'), linewidth=0.5)
        ax.add_feature(cfeature.BORDERS.with_scale('10m'), linewidth=0.5)
        ax.add_feature(cfeature.COASTLINE.with_scale('10m'), linewidth=0.5)

        if road_gdf is not None:
            road_gdf.plot(ax=ax, color='dimgray', linewidth=0.4, transform=ccrs.PlateCarree(), zorder=3)

        if water_gdf is not None:
            water_gdf.plot(ax=ax, color='dodgerblue', linewidth=0.6, alpha=0.6, transform=ccrs.PlateCarree(), zorder=2)

        if city_gdf is not None:
            city_size = 10 if domain_km <= 30 else 4
            if city_gdf is not None and not city_gdf.empty:
                city_size = 10 if domain_km <= 30 else 4
                ax.scatter(
                    city_gdf.geometry.x,
                    city_gdf.geometry.y,
                    s=city_size,
                    color='black',
                    transform=ccrs.PlateCarree(),
                    zorder=4
                )
                
                visible_cities = city_gdf.cx[lon_min:lon_max, lat_min:lat_max]
                for _, crow in visible_cities.iterrows():
                    name = crow.get("name")
                    if name:
                        ax.text(
                            crow.geometry.x,
                            crow.geometry.y,
                            name,
                            transform=ccrs.PlateCarree(),
                            fontsize=7 if domain_km <= 30 else 5,
                            ha='left',
                            va='bottom',
                            color='black',
                            zorder=5
                        )
            visible_cities = city_gdf.cx[lon_pad_min:lon_pad_max, lat_pad_min:lat_pad_max]
            for _, crow in visible_cities.iterrows():
                name = crow.get("name")
                if name:
                    ax.text(
                        crow.geometry.x, crow.geometry.y, name,
                        transform=ccrs.PlateCarree(),
                        fontsize=7 if domain_km <= 30 else 5,
                        ha='left', va='bottom',
                        color='black', zorder=5
                    )

        disp = pyart.graph.RadarMapDisplay(radar)
        disp.plot_ppi_map(
            field, sweep, ax=ax,
            projection=ccrs.PlateCarree(),
            vmin=vmin, vmax=vmax, cmap=cmap,
            colorbar_flag=False,
            min_lat=lat_pad_min, max_lat=lat_pad_max,
            min_lon=lon_pad_min, max_lon=lon_pad_max,
            resolution='10m'
        )

        norm = mcolors.Normalize(vmin=vmin, vmax=vmax)
        mappable = cm.ScalarMappable(norm=norm, cmap=cmap)
        mappable.set_array([])  # Required to make the colorbar work

        cb = plt.colorbar(mappable, ax=ax, shrink=0.7, pad=0.02)
        cb.set_label("Reflectivity (dBZ)", fontsize=8)

        color = "green" if row["reporttype"] == "Hail" else "blue"
        marker = "o"  

        ax.scatter(
            lon, lat,
            s=36,  # size (6^2)
            facecolors=color,
            edgecolors='black',
            marker=marker,
            transform=ccrs.PlateCarree(),
            zorder=6
        )

        add_gridlines(ax, extent)

        label = f"{row.get('location', 'report')} ({row.get('state', '')})"
        ax.set_title(f"{row['reporttype']} - {label} @ {report_time.strftime('%H:%M')} UTC", fontsize=10)

        location = str(row.get('location', 'unknown')).lower().strip()
        location = location.replace(' ', '_').replace('/', '_')

        # Optionally remove directional prefixes (e.g., '1 n', '3 sse') if desired:
        location_parts = location.split()
        if location_parts and location_parts[0].isdigit():
            location = '_'.join(location_parts[1:])

        fname = f"{row['reporttype'].lower()}_{location}_{report_time.strftime('%H%M')}.png"
        plt.savefig(os.path.join(output_dir, fname), bbox_inches='tight', pad_inches=0.1)
        plt.close(fig)

    print(f"✅ Plotted {len(all_reports)} filtered reports with OSM overlays.")


In [None]:
plot_storm_reports_on_radar(
    hail_csv="240817_rpts_raw_hail.csv",
    wind_csv="240817_rpts_filtered_wind.csv",
    data_dir="./radar_files",
    output_dir="./report_plots",
    domain_bounds=(43.5, 46.5, -123, -120)
)

Size:  111.0 km
Downloading roads with tags: ['motorway', 'trunk', 'primary']
Downloading water features with tags: {'natural': ['water'], 'waterway': ['river']}
Downloading cities with tags: ['city', 'town']
Size:  111.0 km
Downloading roads with tags: ['motorway', 'trunk', 'primary']
Downloading water features with tags: {'natural': ['water'], 'waterway': ['river']}
Downloading cities with tags: ['city', 'town']
Size:  111.0 km
Downloading roads with tags: ['motorway', 'trunk', 'primary']
Downloading water features with tags: {'natural': ['water'], 'waterway': ['river']}
Downloading cities with tags: ['city', 'town']
Size:  111.0 km
Downloading roads with tags: ['motorway', 'trunk', 'primary']
Downloading water features with tags: {'natural': ['water'], 'waterway': ['river']}
Downloading cities with tags: ['city', 'town']
Size:  111.0 km
Downloading roads with tags: ['motorway', 'trunk', 'primary']
Downloading water features with tags: {'natural': ['water'], 'waterway': ['river']}
Do

In [None]:
plot_storm_reports_on_radar(
    hail_csv="240817_rpts_raw_hail.csv",
    wind_csv="240817_rpts_filtered_wind.csv",
    data_dir="./radar_files",
    output_dir="./report_plots",
    domain_bounds=(43.5, 46.5, -123, -120),
    sweep = 1,
    field = "velocity",
    vmin = -30,
    vmax = 30,
    cmap = 'pyart_NWSVel'
)