# 

This script searches through a year folder of MERRA2 extratropical storm files (found here: https://portal.nccs.nasa.gov/datashare/Obs-ETC/Fronts-ETC/) and identifies ETCs whose path crosses within a specified grid box. Then, pressure drops are identified at the ETCs' center to identify bomb cyclones.

Filename convention: MERRA2fronts_YYYYMMDD_UT_Latstorm_Lonstorm_surfacetype_IDofTrack 

In [1]:
#import libraries
import glob
import numpy as np
import xarray as xr
from datetime import datetime, timedelta
import csv
import os
from shapely.geometry import LineString
import pickle

In [2]:
#functions
def is_within_grid(lat, lon, mask_data):
    # Find the closest index for the given latitude and longitude
    lat_idx = np.abs(mask_data.lat - lat).argmin()
    lon_idx = np.abs(mask_data.lon - lon).argmin()
    
    mask_value = mask_data.mask[lat_idx, lon_idx].values
    
    if np.isnan(mask_value):
        return False
    return mask_value == 0 or mask_value == 1

def is_line_within_mask(start_lat, start_lon, end_lat, end_lon, mask_data):
    line = LineString([(start_lon, start_lat), (end_lon, end_lat)])
    for point in line.coords:
        if is_within_grid(point[1], point[0], mask_data):
            return True
    return False

def find_pressure_drop_threshold(latitudes):
    """Calculate the pressure drop threshold."""
    return abs(24 * np.sin(np.radians(latitudes)) / np.sin(np.radians(60)))

# Function to create line between start and end coordinates
def create_line(start_lat, start_lon, end_lat, end_lon, num_points=100):
    latitudes = np.linspace(start_lat, end_lat, num_points)
    longitudes = np.linspace(start_lon, end_lon, num_points)
    return list(zip(latitudes, longitudes))

# AR concurrency
def check_AR_proximity(lat,lon, ds, timestep):
    # Filter the dataset for the northern and western hemispheres
    ds_nw = ds.sel(lat=slice(0, 90), lon=slice(-180, 0))

    # Box dimensions in kilometers
    north_box = 500
    south_box = 1500
    east_box = 1500
    west_box = 500

    # Convert kilometers to degrees approximately (assuming 1 degree ~ 111 km)
    km_to_deg = 1 / 111
    north_lat = lat + north_box * km_to_deg
    south_lat = lat - south_box * km_to_deg
    east_lon = lon + east_box * km_to_deg
    west_lon = lon - west_box * km_to_deg

    ds_box = ds_nw.sel(lat=slice(south_lat, north_lat), lon=slice(west_lon, east_lon))
    AR_ids = []
    for ds_lat in ds_box.lat.values:
        for ds_lon in ds_box.lon.values:
            kidmap = ds.kidmap.sel(time=timestep, lat=ds_lat, lon=ds_lon, method='nearest').values.item()
            if kidmap> 1:
                if(kidmap not in AR_ids):
                    AR_ids.append(kidmap)
    return AR_ids

In [3]:
def extract_pressure(file_path, lat, lon, x):
    """Extract the pressure at the closest x indices to the center of the storm"""
    data = xr.open_dataset(file_path, engine='netcdf4')
    latitudes = data['latitude'][:, 0]
    longitudes = data['longitude'][0, :]
    
    nearest_lat_idx = int(abs(latitudes - lat).argmin())
    nearest_lon_idx = int(abs(longitudes - lon).argmin())
    
    # Define the range of indices to consider around the nearest point
    lat_range = slice(max(0, nearest_lat_idx - x//2), min(len(latitudes), nearest_lat_idx + x//2 + 1))
    lon_range = slice(max(0, nearest_lon_idx - x//2), min(len(longitudes), nearest_lon_idx + x//2 + 1))
    
    pressures = data['MERRA2SLP'].isel(nb_latitude=lat_range, nb_longitude=lon_range).values
    #print(f"Extracted pressures around lat: {lat}, lon: {lon} -> {pressures.flatten()}")
    return pressures.flatten()  # Flatten the array to 1D for easier comparison

def findBombCyclones(filenames, mask_data, x, region, AR_dataset_path):
    ''' Filter filenames based on coordinates within the grid box and extract unique 
    storm IDs that pass through the grid box.
    '''
    unique_storm_ids = set()
    for filepath in filenames:
        filename = filepath.split("/")[-1]
        if filename.startswith("MERRA2fronts_"):
            parts = filename.split("_")
            lat, lon = float(parts[3]), float(parts[4])
            if is_within_grid(lat, lon, mask_data):
                unique_storm_ids.add(parts[-1].split('.')[0])

    ''' Check each timestep within the storm to see if the pressures around the storm's 
    center have a drop greater than the threshold based on any previous measurements available.
    '''
    pressure_drop_events = []
    AR_dataset = xr.open_dataset(AR_dataset_path)
    for storm_id in unique_storm_ids:
        storm_files = [fn for fn in filenames if storm_id in fn.split('/')[-1]]
        storm_files.sort()  # Sort files in chronological order

        greatest_drop_event = None  # Track the greatest drop for this storm
        prev_pressures = []
        prev_time = None
        start_lat, start_lon = None, None
        AR_concurrent_dict = {}
        for file_path in storm_files:
            try:
                parts = file_path.split("_")
                date_time = datetime.strptime(parts[1] + parts[2], "%Y%m%d%H")
                lat, lon = float(parts[3]), float(parts[4])

                AR_concurrent_timestep = check_AR_proximity(lat, lon, AR_dataset, date_time)
                if AR_concurrent_timestep:
                    AR_concurrent_dict[date_time] = AR_concurrent_timestep
                
                current_pressures = extract_pressure(file_path, lat, lon, x)

                if prev_pressures:
                    max_prev_pressure = max(prev_pressures)
                    min_current_pressure = min(current_pressures)
                    max_drop = max_prev_pressure - min_current_pressure
                    index_of_prev_pressure = prev_pressures.index(max_prev_pressure)
                    
                    threshold = find_pressure_drop_threshold(lat)

                    #this just checks for the greatest bomb, should we store all bombs that meet the threshold?
                    if max_drop >= threshold:
                        if greatest_drop_event is None or max_drop > greatest_drop_event['pressure_drop']:
                            start_parts = storm_files[0].split("_")
                            date_time_start = datetime.strptime(start_parts[1] + start_parts[2], "%Y%m%d%H")
                            latS, lonS = float(start_parts[3]), float(start_parts[4])
                            end_parts = storm_files[-1].split("_")
                            date_time_end = datetime.strptime(end_parts[1] + end_parts[2], "%Y%m%d%H")
                            latE, lonE = float(end_parts[3]), float(end_parts[4])
                            
                            current_file_index = storm_files.index(file_path)
                            bomb_start_file_index = current_file_index-(len(prev_pressures)-index_of_prev_pressure)
                            bomb_parts = storm_files[bomb_start_file_index].split("_")
                            bomb_date_time_start = datetime.strptime(start_parts[1] + start_parts[2], "%Y%m%d%H")
                            bomblatS, bomblonS = float(start_parts[3]), float(start_parts[4])
                            bomb_start_file_path_name = storm_files[bomb_start_file_index]

                            
                            greatest_drop_event = {
                                'storm_id': storm_id,
                                'storm_files': [os.path.basename(file_path) for file_path in storm_files],
                                'start_time': date_time_start,
                                'end_time': date_time_end,
                                'start_lat': latS,
                                'start_lon': lonS,
                                'end_lat': latE,
                                'end_lon': lonE,
                                
                                'bomb_start_time': bomb_date_time_start,
                                'bomb_end_time': date_time,
                                'bomb_start_file': os.path.basename(bomb_start_file_path_name),
                                'bomb_end_file': os.path.basename(file_path),
                                'pressure_drop': max_drop,
                                'bomb_start_lat': bomblatS,
                                'bomb_start_lon': bomblonS,
                                'bomb_end_lat': lat,
                                'bomb_end_lon': lon,
                                'bomb': True,

                                'region': ('northeast' if region == 'east' and lat >= 38 else
                                    'southeast' if region == 'east' and lat < 38 else
                                    'northwest' if region == 'west' and lat >= 42 else
                                    'southwest' if region == 'west' and lat < 42 else None)
                            }
                # Keep the list to the last 4 pressures + current
                prev_pressures.append(min(current_pressures))
                if len(prev_pressures) > 4:  # rolling window of the last 4 measurements
                    prev_pressures.pop(0)

                if prev_time is None:  # Save the start point of the first pressure drop
                    start_lat, start_lon = lat, lon

                prev_time = date_time
            
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
                continue  # Ignores corrupted netCDF files

        # Add the greatest drop event for this storm, if found, to the list
        if greatest_drop_event:
            #add AR concurrency dicts
            if AR_concurrent_dict:
                greatest_drop_event['AR_IDs'] = AR_concurrent_dict
                greatest_drop_event['AR_storm_concurrent'] = True
                #check if pressure drop point is AR concurrent
                greatest_drop_event['AR_bomb_concurrent'] = len(check_AR_proximity(greatest_drop_event['bomb_end_lat'], greatest_drop_event['bomb_end_lon'], AR_dataset, greatest_drop_event['bomb_end_time']))
            
            else:
                greatest_drop_event['AR_IDs'] = None
                greatest_drop_event['AR_storm_concurrent'] = False
                greatest_drop_event['AR_bomb_concurrent'] = 0
                
            if is_line_within_mask(greatest_drop_event['bomb_start_lat'], greatest_drop_event['bomb_start_lon'],
                                   greatest_drop_event['bomb_end_lat'], greatest_drop_event['bomb_end_lon'], mask_data):
                greatest_drop_event['bomb_in_mask'] = True
                pressure_drop_events.append(greatest_drop_event)
            else:
                if is_line_within_mask(greatest_drop_event['start_lat'], greatest_drop_event['start_lon'],
                                   greatest_drop_event['end_lat'], greatest_drop_event['end_lon'], mask_data):
                    greatest_drop_event['bomb_in_mask'] = False
                    pressure_drop_events.append(greatest_drop_event)
        else:
            start_parts = storm_files[0].split("_")
            date_time_start = datetime.strptime(start_parts[1] + start_parts[2], "%Y%m%d%H")
            latS, lonS = float(start_parts[3]), float(start_parts[4])
            end_parts = storm_files[-1].split("_")
            date_time_end = datetime.strptime(end_parts[1] + end_parts[2], "%Y%m%d%H")
            latE, lonE = float(end_parts[3]), float(end_parts[4])
            #check if storm path is in mask
            if is_line_within_mask(latS, lonS, latE, lonE, mask_data):
                non_bomb = {
                                    'storm_id': storm_id,
                                    'start_time': date_time_start,
                                    'end_time': date_time_end,
                                    'start_lat': latS,
                                    'start_lon': lonS,
                                    'end_lat': latE,
                                    'end_lon': lonE,
                                    'storm_files': [os.path.basename(file_path) for file_path in storm_files],
                    
                                    'bomb': False,
                                    'bomb_in_mask': None,
                                    'bomb_start_time': None,
                                    'bomb_end_time': None,
                                    'pressure_drop': None,
                                    'bomb_start_lat': None,
                                    'bomb_start_lon': None,
                                    'bomb_end_lat': None,
                                    'bomb_end_lon': None,
                                    'AR_bomb_concurrent': None,

                                    #maybe change in future to check quad for storm in region mask
                                    'region': ('northeast' if region == 'east' and latE >= 38 else
                                    'southeast' if region == 'east' and latE < 38 else
                                    'northwest' if region == 'west' and latE >= 42 else
                                    'southwest' if region == 'west' and latE < 42 else None)
                                }
                if AR_concurrent_dict:
                    non_bomb['AR_IDs'] = AR_concurrent_dict
                    non_bomb['AR_storm_concurrent'] = True
                else:
                    non_bomb['AR_IDs'] = None
                    non_bomb['AR_storm_concurrent'] = False
                pressure_drop_events.append(non_bomb)
            
            

    return pressure_drop_events

In [4]:
def extract_lat_lon_from_filename(filename):
    parts = filename.split('_')
    try:
        lat_str = parts[3]
        lon_str = parts[4]
        lat = float(lat_str)
        lon = float(lon_str)
        return lat, lon
    except (IndexError, ValueError) as e:
        raise ValueError(f"Latitude and longitude not found in filename: {filename}. Error: {e}")

In [5]:
# Configuration
region = 'west'
mask_file = f"/Users/lilydonaldson/Downloads/examples/util/masks/{region}coast_mask_landocean.nc"
mask_data = xr.open_dataset(mask_file)

year_start = 2010
year_end = 2010
num_gridboxes_to_check = 9 #x by x grid around center point

In [7]:
for year in range(year_start, year_end + 1):
    print(f"Processing year: {year}")
    folder_path = f"/Volumes/SSK Drive /merra2fronts/merra2fronts{year}/*/*"
    # Grab all of the NetCDF storm files for an entire year
    filenames = glob.glob(folder_path, recursive=True)
    print(f"# of files found for {year}")
    print(len(filenames))

    AR_dataset = f"/Users/lilydonaldson/Downloads/examples/data/AR_out/GISS_ARout5_WISO_20th_MERRA2_ANL_{year}.nc"
    
    pressure_drop_events = findBombCyclones(filenames, mask_data, num_gridboxes_to_check,region, AR_dataset)
    
    output_dir = "/Users/lilydonaldson/Downloads/examples/data/"
    f_path = folder_path.replace("/*/*","")
    output_path = os.path.join(output_dir, f'{region}_ETCs_{year}.pkl')
    with open(output_path, 'wb') as f:
        pickle.dump(pressure_drop_events, f)
    print(f"Saved {len(pressure_drop_events)} events for year {year} to {output_path}")
    
    print(f"ETCs identified within the Year {year} for {region}")
    print(f"Number of ETCs Identified {year}:")
    print(len(pressure_drop_events))
    # # Print the pressure drop events
    # for event in pressure_drop_events:
    #     rounded_drop = f"{event['pressure_drop']:.2f}"
    #     print(f"Storm ID: {event['storm_id']}, Start Time: {event['start_time'].strftime('%Y-%m-%d %H:%M:%S')}, End Time: {event['end_time'].strftime('%Y-%m-%d %H:%M:%S')}, \n     Pressure Drop: {rounded_drop} hPa")

Processing year: 2010
# of files found for 2010
58934
Saved 34 events for year 2010 to /Users/lilydonaldson/Downloads/examples/data/west_ETCs_2010.pkl
ETCs identified within the Year 2010 for west
Number of ETCs Identified 2010:
34


In [8]:
with open(f"/Users/lilydonaldson/Downloads/examples/data/west_ETCs_2010.pkl", 'rb') as f:
    BC = pickle.load(f)
print(len(BC))
print(BC[0])

34
{'storm_id': '20100608120510023850', 'start_time': datetime.datetime(2010, 6, 8, 12, 0), 'end_time': datetime.datetime(2010, 6, 11, 18, 0), 'start_lat': 39.0, 'start_lon': -121.5, 'end_lat': 57.0, 'end_lon': -123.0, 'storm_files': ['MERRA2fronts_20100608_12_39.00_-121.50_land_20100608120510023850.ncdf', 'MERRA2fronts_20100608_18_41.45_-121.79_land_20100608120510023850.ncdf', 'MERRA2fronts_20100609_00_44.77_-119.80_land_20100608120510023850.ncdf', 'MERRA2fronts_20100609_06_47.25_-117.90_land_20100608120510023850.ncdf', 'MERRA2fronts_20100609_12_47.62_-115.63_land_20100608120510023850.ncdf', 'MERRA2fronts_20100609_18_50.78_-119.25_land_20100608120510023850.ncdf', 'MERRA2fronts_20100610_00_52.34_-121.29_land_20100608120510023850.ncdf', 'MERRA2fronts_20100610_06_52.17_-121.38_land_20100608120510023850.ncdf', 'MERRA2fronts_20100610_12_51.90_-121.79_land_20100608120510023850.ncdf', 'MERRA2fronts_20100610_18_52.68_-121.26_land_20100608120510023850.ncdf', 'MERRA2fronts_20100611_00_54.67_-12