# 

In [4]:
#import libraries
import glob
import numpy as np
import xarray as xr
from datetime import datetime, timedelta
import csv
import os
from shapely.geometry import LineString
import pickle
import pandas as pd
from geopy.distance import geodesic
from tqdm import tqdm

In [5]:
#functions
def is_within_grid(lat, lon, mask_data):
    # Find the closest index for the given latitude and longitude
    lat_idx = np.abs(mask_data.lat - lat).argmin()
    lon_idx = np.abs(mask_data.lon - lon).argmin()
    
    mask_value = mask_data.mask[lat_idx, lon_idx].values
    if np.isnan(mask_value):
        return False
    return mask_value == 0 or mask_value == 1

def is_line_within_mask(start_lat, start_lon, end_lat, end_lon, mask_data, num_points=50):
    lats = np.linspace(start_lat, end_lat, num_points)
    lons = np.linspace(start_lon, end_lon, num_points)
    save = False
    for lat, lon in zip(lats, lons):
        if is_within_grid(lat, lon, mask_data):
            save = True
    return save

def find_pressure_drop_threshold(latitudes):
    """Calculate the pressure drop threshold."""
    return abs(24 * np.sin(np.radians(latitudes)) / np.sin(np.radians(60)))

# Function to create line between start and end coordinates
def create_line(start_lat, start_lon, end_lat, end_lon, num_points=100):
    latitudes = np.linspace(start_lat, end_lat, num_points)
    longitudes = np.linspace(start_lon, end_lon, num_points)
    return list(zip(latitudes, longitudes))

# AR concurrency
def check_AR_proximity(lat, lon, ds, timestep):
    # Box dimensions in kilometers
    north_box = 500
    south_box = 1500
    east_box = 1500
    west_box = 500

    # Convert kilometers to degrees approximately (assuming 1 degree ~ 111 km)
    km_to_deg = 1 / 111
    north_lat = lat + north_box * km_to_deg
    south_lat = lat - south_box * km_to_deg
    east_lon = lon + east_box * km_to_deg
    west_lon = lon - west_box * km_to_deg

    # Find indices for the bounding box
    lat_inds = np.where((ds.lat >= south_lat) & (ds.lat <= north_lat))[0]
    lon_inds = np.where((ds.lon >= west_lon) & (ds.lon <= east_lon))[0]

    # Subset the dataset using indices
    ds_box = ds.isel(lat=lat_inds, lon=lon_inds)

    # Check if lat/lon is in a gridbox where kidmap is >1
    AR_ids = []
    kidmap = ds['kidmap'].sel(time=timestep, method='nearest')

    for lat_ind in lat_inds:
        for lon_ind in lon_inds:
            kidmap_value = kidmap.isel(lat=lat_ind, lon=lon_ind).values.item()
            if kidmap_value > 1:
                if kidmap_value not in AR_ids:
                    AR_ids.append(kidmap_value)
                    return AR_ids
    return AR_ids 


In [6]:
def extract_pressure(data, lat, lon, x):
    """Extract the pressure at the closest x indices to the center of the storm"""
    latitudes = data['Latitude'].unique()
    longitudes = data['Longitude'].unique()
    
    nearest_lat_idx = int(np.abs(latitudes - lat).argmin())
    nearest_lon_idx = int(np.abs(longitudes - lon).argmin())
    
    # Define the range of indices to consider around the nearest point
    lat_range = latitudes[max(0, nearest_lat_idx - x//2): min(len(latitudes), nearest_lat_idx + x//2 + 1)]
    lon_range = longitudes[max(0, nearest_lon_idx - x//2): min(len(longitudes), nearest_lon_idx + x//2 + 1)]
    
    pressures = data[(data['Latitude'].isin(lat_range)) & (data['Longitude'].isin(lon_range))]['Sea_level_pressure'].values
    return pressures.flatten()  # Flatten the array to 1D for easier comparison

def calculate_distance(lat1, lon1, lat2, lon2):
    """Calculate the geodesic distance between two lat/lon points"""
    return geodesic((lat1, lon1), (lat2, lon2)).kilometers

def findBombCyclones(data, mask_data, x, region, AR_dataset):
    #print(AR_dataset.head())
    ''' Filter data based on coordinates within the grid box and extract unique 
    storm IDs that pass through the grid box.
    '''
    unique_storm_ids = data['USI'].unique()
    complete = len(unique_storm_ids)
    print(f"# ids: {len(unique_storm_ids)}")
    
    ''' Check each timestep within the storm to see if the pressures around the storm's 
    center have a drop greater than the threshold based on any previous measurements available.
    '''
    with tqdm(total=complete, desc="Processing Storms", unit="storm") as pbar:
        pressure_drop_events = []
        for storm_id in unique_storm_ids:
            storm_data = data[data['USI'] == storm_id]
            storm_data = storm_data.sort_values(by=['Year', 'Month', 'Day', 'Hour'])  # Sort data in chronological order
    
            greatest_drop_event = None  # Track the greatest drop for this storm
            prev_pressures = []
            prev_time = None
            start_lat, start_lon = None, None
            AR_concurrent_dict = {}
            distances = []
            prev_lat, prev_lon = None, None
            for _, row in storm_data.iterrows():
                try:
                    date_time = datetime(row['Year'], row['Month'], row['Day'], row['Hour'])
                    lat, lon = row['Latitude'], row['Longitude']
    
                    if prev_lat is not None and prev_lon is not None:
                        distances.append(calculate_distance(prev_lat, prev_lon, lat, lon))
                    prev_lat, prev_lon = lat, lon
                    # AR_concurrent_timestep = check_AR_proximity(lat, lon, AR_dataset, date_time)
                    # if AR_concurrent_timestep:
                    #     AR_concurrent_dict[date_time] = AR_concurrent_timestep
                    
                    current_pressures = extract_pressure(storm_data, lat, lon, x)
    
                    if prev_pressures:
                        max_prev_pressure = max(prev_pressures)
                        min_current_pressure = min(current_pressures)
                        max_drop = max_prev_pressure - min_current_pressure
                        index_of_prev_pressure = prev_pressures.index(max_prev_pressure)
                        
                        threshold = find_pressure_drop_threshold(lat)
    
                        # This just checks for the greatest bomb, should we store all bombs that meet the threshold?
                        if max_drop >= threshold:
                            
                            if greatest_drop_event is None or max_drop > greatest_drop_event['pressure_drop']:
                                start_row = storm_data.iloc[0]
                                end_row = storm_data.iloc[-1]
                                date_time_start = datetime(start_row['Year'], start_row['Month'], start_row['Day'], start_row['Hour'])
                                latS, lonS = start_row['Latitude'], start_row['Longitude']
                                date_time_end = datetime(end_row['Year'], end_row['Month'], end_row['Day'], end_row['Hour'])
                                latE, lonE = end_row['Latitude'], end_row['Longitude']
                                
                                current_row_index = storm_data.index.get_loc(row.name)
                                bomb_start_row_index = current_row_index - (len(prev_pressures) - index_of_prev_pressure)
                                bomb_start_row = storm_data.iloc[bomb_start_row_index]
                                bomb_date_time_start = datetime(bomb_start_row['Year'], bomb_start_row['Month'], bomb_start_row['Day'], bomb_start_row['Hour'])
                                bomblatS, bomblonS = bomb_start_row['Latitude'], bomb_start_row['Longitude']
                                
                                greatest_drop_event = {
                                    'storm_id': storm_id,
                                    'start_time': date_time_start,
                                    'end_time': date_time_end,
                                    'start_lat': latS,
                                    'start_lon': lonS,
                                    'end_lat': latE,
                                    'end_lon': lonE,
                                    
                                    'bomb_start_time': bomb_date_time_start,
                                    'bomb_end_time': date_time,
                                    'pressure_drop': max_drop,
                                    'bomb_start_lat': bomblatS,
                                    'bomb_start_lon': bomblonS,
                                    'bomb_end_lat': lat,
                                    'bomb_end_lon': lon,
                                    'bomb': True,
                                    'distance_between_timesteps': distances,
    
                                    'region': ('northeast' if region == 'east' and lat >= 38 else
                                        'southeast' if region == 'east' and lat < 38 else
                                        'northwest' if region == 'west' and lat >= 42 else
                                        'southwest' if region == 'west' and lat < 42 else None)
                                }
                    # Keep the list to the last 4 pressures + current
                    prev_pressures.append(min(current_pressures))
                    if len(prev_pressures) > 4:  # rolling window of the last 4 measurements
                        prev_pressures.pop(0)
    
                    if prev_time is None:  # Save the start point of the first pressure drop
                        start_lat, start_lon = lat, lon
    
                    prev_time = date_time
                
                except Exception as e:
                    print(f"Error processing row: {e}")
                    continue  # Ignores corrupted netCDF files
    
            # Add the greatest drop event for this storm, if found, to the list
            if greatest_drop_event:
                pbar.update(1)
                #add AR concurrency dicts
                if AR_concurrent_dict:
                    greatest_drop_event['AR_IDs'] = AR_concurrent_dict
                    greatest_drop_event['AR_storm_concurrent'] = True
                    #check if pressure drop point is AR concurrent
                    greatest_drop_event['AR_bomb_concurrent'] = len(check_AR_proximity(greatest_drop_event['bomb_end_lat'], greatest_drop_event['bomb_end_lon'], AR_dataset, greatest_drop_event['bomb_end_time']))
                
                else:
                    greatest_drop_event['AR_IDs'] = None
                    greatest_drop_event['AR_storm_concurrent'] = False
                    greatest_drop_event['AR_bomb_concurrent'] = 0
                    
                if is_line_within_mask(greatest_drop_event['bomb_start_lat'], greatest_drop_event['bomb_start_lon'],
                                       greatest_drop_event['bomb_end_lat'], greatest_drop_event['bomb_end_lon'], mask_data):
                    greatest_drop_event['bomb_in_mask'] = True
                    pressure_drop_events.append(greatest_drop_event)
                else:
                    if is_line_within_mask(greatest_drop_event['start_lat'], greatest_drop_event['start_lon'],
                                       greatest_drop_event['end_lat'], greatest_drop_event['end_lon'], mask_data):
                        greatest_drop_event['bomb_in_mask'] = False
                        pressure_drop_events.append(greatest_drop_event)
            else:
                pbar.update(1)
                
                start_row = storm_data.iloc[0]
                end_row = storm_data.iloc[-1]
                date_time_start = datetime(start_row['Year'], start_row['Month'], start_row['Day'], start_row['Hour'])
                latS, lonS = start_row['Latitude'], start_row['Longitude']
                date_time_end = datetime(end_row['Year'], end_row['Month'], end_row['Day'], end_row['Hour'])
                latE, lonE = end_row['Latitude'], end_row['Longitude']
                #check if storm path is in mask
                if is_line_within_mask(latS, lonS, latE, lonE, mask_data):
                    non_bomb = {
                                        'storm_id': storm_id,
                                        'start_time': date_time_start,
                                        'end_time': date_time_end,
                                        'start_lat': latS,
                                        'start_lon': lonS,
                                        'end_lat': latE,
                                        'end_lon': lonE,
                        
                                        'bomb': False,
                                        'bomb_in_mask': None,
                                        'bomb_start_time': None,
                                        'bomb_end_time': None,
                                        'pressure_drop': None,
                                        'bomb_start_lat': None,
                                        'bomb_start_lon': None,
                                        'bomb_end_lat': None,
                                        'bomb_end_lon': None,
                                        'AR_bomb_concurrent': None,
                                        'distance_between_timesteps': distances,
    
                                        #maybe change in future to check quad for storm in region mask
                                        'region': ('northeast' if region == 'east' and latE >= 38 else
                                        'southeast' if region == 'east' and latE < 38 else
                                        'northwest' if region == 'west' and latE >= 42 else
                                        'southwest' if region == 'west' and latE < 42 else None)
                                    }
                    if AR_concurrent_dict:
                        non_bomb['AR_IDs'] = AR_concurrent_dict
                        non_bomb['AR_storm_concurrent'] = True
                    else:
                        non_bomb['AR_IDs'] = None
                        non_bomb['AR_storm_concurrent'] = False
                    pressure_drop_events.append(non_bomb)
            
            

    return pressure_drop_events


In [31]:
# Configuration
region = 'west'
mask_file = f"/Users/lilydonaldson/Downloads/examples/util/masks/{region}coast_mask_landocean.nc"
mask_data = xr.open_dataset(mask_file)

year_start = 2020
year_end = 2020
num_gridboxes_to_check = 9 #x by x grid around center point

In [32]:
# Define the columns and their positions in the text file
columns = [
    'Year', 'Month', 'Day', 'Hour', 'Unused1', 'lat_proxy', 'lon_proxy', 'Unused2',
    'Sea_level_pressure', 'Unused3', 'Unused4', 'Unused5', 'Unused6',
    'Unused7', 'CSI', 'USI'
]

# Read the text file into a DataFrame
data = pd.read_csv('/Users/lilydonaldson/Downloads/out_era5_output_2018_2020.txt', delim_whitespace=True, names=columns)

# Convert proxies to actual values
data['Latitude'] = 90 - data['lat_proxy'] / 100
data['Longitude'] = data['lon_proxy'] / 100
data['Longitude'] = data['Longitude'].apply(lambda x: x - 360 if x > 90 else x)
data['Sea_level_pressure'] = data['Sea_level_pressure'] / 1000

AR_dataset_path = f"/Volumes/SSK Drive /ERA5_ARs/globalARcatalog_ERA5_1940-2023_v4.0.nc"
AR_dataset = xr.open_dataset(AR_dataset_path)
for year in range(year_start, year_end + 1):
    print(f"Processing {year}")
    #just grab year of data from data[]
    current_year_data = data[data['Year'] == year]
    
    
    pressure_drop_events = findBombCyclones(current_year_data, mask_data, num_gridboxes_to_check,region, AR_dataset)
    
    output_dir = f"/Users/lilydonaldson/Downloads/examples/data/merra2fronts/identified_bomb_cyclones/updatedJuly22/{region}Coast/"
    output_path = os.path.join(output_dir, f'ERA5_ERA5ar_{region}_ETCs_{year}.pkl')
    with open(output_path, 'wb') as f:
        pickle.dump(pressure_drop_events, f)
    print(f"Saved {len(pressure_drop_events)} events for year {year} to {output_path}")
    
    print(f"ETCs identified within the Year {year} for {region}")
    print(f"Number of ETCs Identified {year}:")
    print(len(pressure_drop_events))

  data = pd.read_csv('/Users/lilydonaldson/Downloads/out_era5_output_2018_2020.txt', delim_whitespace=True, names=columns)


Processing 2020
# ids: 304


Processing Storms: 100%|███████████████████| 304/304 [00:06<00:00, 46.75storm/s]

Saved 0 events for year 2020 to /Users/lilydonaldson/Downloads/examples/data/merra2fronts/identified_bomb_cyclones/updatedJuly22/westCoast/ERA5_ERA5ar_west_ETCs_2020.pkl
ETCs identified within the Year 2020 for west
Number of ETCs Identified 2020:
0





In [10]:
# Define the path to your .pkl file
file_path = "/Users/lilydonaldson/Downloads/examples/data/merra2fronts/identified_bomb_cyclones/updatedJuly22/eastCoast/ERA5_ERA5ar_east_ETCs_2000.pkl"

# Load the .pkl file
with open(file_path, 'rb') as f:
    events = pickle.load(f)
print(events[0])
# Check the number of events that are AR_storm_concurrent
ar_storm_concurrent_count = sum(event['AR_storm_concurrent'] for event in events if 'AR_storm_concurrent' in event)

print(f"Number of AR_storm_concurrent events: {ar_storm_concurrent_count}")

{'storm_id': '20000112120525025800', 'start_time': datetime.datetime(2000, 1, 12, 12, 0), 'end_time': datetime.datetime(2000, 1, 15, 0, 0), 'start_lat': 37.7, 'start_lon': -101.47000000000003, 'end_lat': 54.63, 'end_lon': -49.639999999999986, 'bomb_start_time': datetime.datetime(2000, 1, 12, 18, 0), 'bomb_end_time': datetime.datetime(2000, 1, 13, 18, 0), 'pressure_drop': 20.418000000000006, 'bomb_start_lat': 38.16, 'bomb_start_lon': -94.55000000000001, 'bomb_end_lat': 40.02, 'bomb_end_lon': -70.48000000000002, 'bomb': True, 'distance_between_timesteps': [610.3727054133809, 444.4756297167836, 479.3758784505471, 495.9863614167866, 679.539915905261, 533.0143957248911, 636.3894889080922, 354.26365543009297, 519.1016151923093, 502.85596987288307], 'region': 'northeast', 'AR_IDs': None, 'AR_storm_concurrent': False, 'AR_bomb_concurrent': 0, 'bomb_in_mask': True}
Number of AR_storm_concurrent events: 0
