In [1]:
def remove_leading_and_trailing_zeroes(df, threshold = 0.005):
    
    # Identify the start and end of the event where values are above the threshold
    event_start = df[df['precipitation (mm)'] >= threshold].index.min()
    event_end = df[df['precipitation (mm)'] >= threshold].index.max()

    # Handle cases where no values are above the threshold
    if pd.isna(event_start) or pd.isna(event_end):
        print("No events found with precipitation >= threshold.")
    else:
        # Remove values < threshold from the start and end of the event
        trimmed_test = df.loc[event_start:event_end].reset_index(drop=True)

    return trimmed_test

def process_events_alltogether(home_dir, time_period, ems, tb0_vals, save_dir):
    events_dict = {}
    event_props_ls = []
    event_profiles_dict = {}

    for em in ems:
        print(em)
        for gauge_num in range(0, 1294):
            if gauge_num not in [444, 827, 888]:
                if gauge_num % 100 == 0:
                    print(f"Processing gauge {gauge_num}")
                indy_events_fp = home_dir + f"ProcessedData/IndependentEvents/UKCP18_30mins/{time_period}/{em}/{gauge_num}/WholeYear/EventSet/"

                files = [f for f in os.listdir(indy_events_fp) if f.endswith('.csv')]
                files = np.sort(files)

                for event_num, file in enumerate(files):
                    fp = indy_events_fp + f"{file}"
                    if '2080' in fp:
                        continue

                    # Get event
                    this_event = read_event(gauge_num, fp)

                    # Get times and precipitation values
                    event_times = this_event['times']
                    event_precip = this_event['precipitation (mm)']

                    # Apply the function to adjust the dates in the 'times' column
                    event_times_fixed = event_times.apply(adjust_feb_dates)

                    # Create the DataFrame with corrected times
                    event_df = pd.DataFrame({'precipitation (mm)': event_precip, 'times': event_times_fixed})
                    # Remove leading and trailing zeroes
                    event_df = remove_leading_and_trailing_zeroes(event_df)
                    # Create characteristics dictionary
                    event_props = create_event_characteristics_dict(event_df)

                    # Add the duration
                    event_props['dur_for_which_this_is_amax'] = get_dur_for_which_this_is_amax(fp)
                    # Add gauge number and ensemble member
                    event_props['gauge_num'] = gauge_num
                    event_props['area'] = tb0_vals.iloc[gauge_num]['within_area']
                    event_props['em'] = em
                    event_props['filename'] = file

                    ##########################################
                    # Specify the keys you want to check
                    keys_to_check = ['duration', 'year', 'gauge_num', 'month', 'Volume', 'max_intensity']

                    # Extract the values for the specified keys from dict_to_check
                    values_to_check = tuple(event_props[key] for key in keys_to_check)

                    # Initialize a variable to store the found dictionary
                    matched_dict = None

                    # Check if a matching dictionary exists in the list based on the specified keys
                    for index, d in enumerate(event_props_ls):
                        if tuple(d[key] for key in keys_to_check) == values_to_check:
                            matched_dict = d  # Store the matching dictionary
                            break  # Exit the loop since we found a match

                    if matched_dict:
                        # print("A matching dictionary found:", matched_dict, event_props)

                        new_value = event_props['dur_for_which_this_is_amax']
                        existing_value = matched_dict.get('dur_for_which_this_is_amax', '')
                        # Create or update the value as a list
                        if isinstance(existing_value, list):
                            existing_value.append(new_value)
                        else:
                            existing_value = [existing_value, new_value]  # Convert existing string to list and add 'yes'
                        matched_dict['dur_for_which_this_is_amax'] = existing_value

                        event_props_ls[index]= matched_dict

                    else:
                        # print("No matching dictionary found in the list.")

                        ##########################################
                        events_dict[f"{em}, {gauge_num}, {event_num}"] = event_df
                        event_props_ls.append(event_props)
                        event_profiles_dict[f"{em}, {gauge_num}, {event_num}"] = create_profiles_dict(event_df)

        print(f"Finished {em}")                        
        
        with open(save_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/{time_period}/events_dict_{em}_newest.pickle", 'wb') as handle:
            pickle.dump(events_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

        with open(save_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/{time_period}/event_profiles_dict_{em}_newest.pickle", 'wb') as handle:
            pickle.dump(event_profiles_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

        with open(save_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/{time_period}/event_props_dict_{em}_newest.pickle", 'wb') as handle:
            pickle.dump(event_props_ls, handle, protocol=pickle.HIGHEST_PROTOCOL)                       

    return events_dict, event_props_ls, event_profiles_dict

In [2]:
import os
import numpy as np
import re
import pickle
import sys

import geopandas as gpd
import pandas as pd
from shapely.geometry import Point

from ProcessEventsFunctions import *
sys.path.insert(1, 'Old')
from Steef_Functions import *

In [3]:
home_dir = '/nfs/a319/gy17m2a/PhD/'
home_dir2 = '/nfs/a161/gy17m2a/PhD/'

In [4]:
quintile_mapping = {1: 'F2', 2: 'F1', 3: 'C', 4: 'B1', 5: 'B2'}
quintile_mapping_thirds = {1: 'F', 2: 'C', 3: 'B'}

In [5]:
tbo_vals = pd.read_csv(home_dir + 'datadir/RainGauge/interarrival_thresholds_CDD_noMissing.txt')
# Check if the points are within the areas
tbo_vals = check_for_gauge_in_areas(tbo_vals, home_dir, ['NW', 'NE', 'ME', 'SE', 'SW'])
tbo_vals.loc[tbo_vals['within_area'] == 'NW, C', 'within_area'] = 'NW'
tbo_vals.loc[tbo_vals['within_area'] == 'ME, SE', 'within_area'] = 'ME'

### Define ensemble members for present and future

In [6]:
ems_present = ['bc010']
ems_future = ['bb189','bb192', 'bb195', 'bb198', 'bb201', 'bb204','bb208' ,'bb211','bb216', 'bb219','bb222','bb225']

### Get events (considering one set of AMAX producing events (with duplicates deleted)

In [7]:
# # Now you can call the function for both time periods
events_dict_present, event_props_dict_present, event_profiles_dict_present = process_events_alltogether(home_dir2, 'Present',ems_present, tbo_vals, home_dir)
# events_dict_future, event_props_dict_future, event_profiles_dict_future = process_events_alltogether(home_dir2, 'Future', ems_future, tbo_vals)

bc010
Processing gauge 0
Processing gauge 100
Processing gauge 200
Processing gauge 300
Processing gauge 400
Processing gauge 500
Processing gauge 600
Processing gauge 700
Processing gauge 800
Processing gauge 900
Processing gauge 1000
Processing gauge 1100
Processing gauge 1200
Finished bc010


In [47]:
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Present/events_dict_present.pickle", 'wb') as handle:
    pickle.dump(events_dict_present, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Future/events_dict_future.pickle", 'wb') as handle:
    pickle.dump(events_dict_future, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Present/event_profiles_dict_present", 'wb') as handle:
    pickle.dump(event_profiles_dict_present, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Future/event_profiles_dict_future.pickle", 'wb') as handle:
    pickle.dump(event_profiles_dict_future, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Present/event_props_dict_present.pickle", 'wb') as handle:
    pickle.dump(event_props_dict_present, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Future/event_props_dict_future.pickle", 'wb') as handle:
    pickle.dump(event_props_dict_future, handle, protocol=pickle.HIGHEST_PROTOCOL)    

## Get events for each duration (there'll be cross over in events present for each duration)
By duration here we mean the duration for which the AMAX are associated, rather than the actual duration of the event

In [None]:
# List of desired durations
valid_durations = ["0.5", "1", "2", "3", "6", "12", "24"]

# Process events for both time periods
results_present = process_events_by_duration(home_dir2, 'Present', valid_durations, ems_present, tbo_vals)
# results_future = process_events_by_duration('Future', valid_durations, ems_future, tbo_vals)

In [18]:
dur_present_events_dict, dur_present_event_props_dict, dur_present_event_profiles_dict = results_present
# dur_future_events_dict, dur_future_event_props_dict, dur_future_event_profiles_dict = results_future

In [15]:
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Present/results_each_dur_present.pickle", 'wb') as handle:
    pickle.dump(results_present, handle, protocol=pickle.HIGHEST_PROTOCOL)    
# with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Future/results_each_dur_future.pickle", 'wb') as handle:
#     pickle.dump(results_future, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Do we have the same number of results for each duration?
I believe that longer durations have more events because of compound annual maxima

In [19]:
for duration in valid_durations:
    print(duration, len(dur_present_event_props_dict[duration].keys()))

0.5 2964
1 2964
2 2964
3 2964
6 2966
12 3022
24 3449


In [33]:
# List of desired durations
duration_bins = ['<4hr', '4-12hr', '12hr+']

# Process events for both time periods
results_present_dur_categories_simple = process_events_by_duration(home_dir2, 'Present', duration_bins, ems_present, tbo_vals)
results_future_dur_categories_simple = process_events_by_duration(home_dir2, 'Future', duration_bins, ems_future, tbo_vals)

bc005
Processing gauge 0
Processing gauge 100
Processing gauge 200
Processing gauge 300
Processing gauge 400
Processing gauge 500
Processing gauge 600
Processing gauge 700
Processing gauge 800
Processing gauge 900
Processing gauge 1000
Processing gauge 1100
Processing gauge 1200
bc006
Processing gauge 0
Processing gauge 100
Processing gauge 200
Processing gauge 300
Processing gauge 400
Processing gauge 500
Processing gauge 600
Processing gauge 700
Processing gauge 800
Processing gauge 900
Processing gauge 1000
Processing gauge 1100
Processing gauge 1200
bc007
Processing gauge 0
Processing gauge 100
Processing gauge 200
Processing gauge 300
Processing gauge 400
Processing gauge 500
Processing gauge 600
Processing gauge 700
Processing gauge 800
Processing gauge 900
Processing gauge 1000
Processing gauge 1100
Processing gauge 1200
bc009
Processing gauge 0
Processing gauge 100
Processing gauge 200
Processing gauge 300
Processing gauge 400
Processing gauge 500
Processing gauge 600
Processin

In [36]:
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Present/results_present_dur_categories_simple.pickle", 'wb') as handle:
    pickle.dump(results_present_dur_categories_simple, handle, protocol=pickle.HIGHEST_PROTOCOL)    
with open(home_dir + f"ProcessedData/AMAX_Events/UKCP18_30mins/Future/results_future_dur_categories_simple.pickle", 'wb') as handle:
    pickle.dump(results_future_dur_categories_simple, handle, protocol=pickle.HIGHEST_PROTOCOL)    