In [12]:
def find_rainfall_cores(df, duration, Tb0, top_n=20):
    # Determine the length of the window based on provided duration
    window_length = int(duration * 2)

    # Identify dry periods based on a precipitation threshold
    df['is_dry'] = df['precipitation (mm/hr)'] < 0.1

    # Calculate the rolling sum of precipitation over the specified window length
    df['Rolling_Sum'] = df['precipitation (mm)'].rolling(window=window_length).sum()

    # List to store the top N rainfall cores
    rainfall_cores = []

    # Keep track of indices already used to avoid overlap
    used_indices = set()

    for _ in range(top_n):
        # Find the index of the maximum rolling sum (i.e., core with the highest rainfall)
        max_rainfall_end_index = df.loc[~df.index.isin(used_indices), 'Rolling_Sum'].idxmax()
        
        if pd.isna(max_rainfall_end_index):
            break  # If no more max values are found, stop the loop

        # Convert index to a positional integer for slicing
        max_rainfall_end_pos = df.index.get_loc(max_rainfall_end_index)

        # Calculate the start position of the window, ensuring it doesn't go below the DataFrame's range
        max_rainfall_start_pos = max(0, max_rainfall_end_pos - window_length)

        # Extract the window of maximum rainfall from the DataFrame
        max_rainfall_window = df.iloc[max_rainfall_start_pos:max_rainfall_end_pos].copy()

        # Check for dry periods to see if it is split into two events
        max_rainfall_window['consecutive_dry'] = 0
        consecutive_dry_count = 0
        for i in range(len(max_rainfall_window)):
            if max_rainfall_window.iloc[i]['is_dry']:
                consecutive_dry_count += 1
            else:
                consecutive_dry_count = 0
            max_rainfall_window.iloc[i, max_rainfall_window.columns.get_loc('consecutive_dry')] = consecutive_dry_count

        # Check if the maximum consecutive dry period exceeds twice the Tb0 threshold
        if np.nanmax(max_rainfall_window['consecutive_dry']) > Tb0 * 2:
            print('2 events found due to long dry period.')
            split_index = max_rainfall_window[max_rainfall_window['consecutive_dry'] == (Tb0 * 2)].index[0]
            event1 = max_rainfall_window.loc[:split_index]
            event2 = max_rainfall_window.loc[split_index:]
            rainfall_cores.extend([event1, event2])
        else:
            rainfall_cores.append(max_rainfall_window)

        # Mark the indices of this window as used to avoid overlap
        used_indices.update(max_rainfall_window.index)

    return rainfall_cores[:top_n]

def expand_events(rainfall_cores, df, Tb0):
    rainfall_events_expanded = []

    for rainfall_core in rainfall_cores:
        rainfall_core_after_search1 = search1(df, rainfall_core)
        rainfall_core_after_search2 = search2(df, rainfall_core_after_search1)
        rainfall_core_after_search3 = search3(df, rainfall_core_after_search2, Tb0=Tb0)
        
        # Check if the event has significant rainfall
        if len(rainfall_core_after_search3[rainfall_core_after_search3['precipitation (mm/hr)'] > 0.1]) > 0:
            rainfall_events_expanded.append(rainfall_core_after_search3)

    return rainfall_events_expanded

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from warnings import simplefilter
warnings.filterwarnings("ignore", category =UserWarning,)

import gc
import pickle
from collections import OrderedDict
from pyproj import Transformer
import numpy as np
import pandas as pd
import iris
import glob
import sys
import os
import cartopy.crs as ccrs
import itertools
from scipy import spatial
import numpy.ma as ma
import tilemapbase
from math import cos, radians
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
from pyproj import Proj, transform
import time

sys.path.insert(1,'../')
from Identify_Events_Functions import *
from Prepare_Data_Functions import *

pd.set_option('display.float_format', '{:.3f}'.format)
warnings.filterwarnings("ignore", category=UserWarning)

yrs_range= '2060_2081'
em = "bb222"
timeperiod = 'Future'
print(em)

if timeperiod == 'Future':
    sample_yr=2066
elif timeperiod == 'Present':
    sample_yr=2006
    
# Get Tb0 values at each gauge
tbo_vals = pd.read_csv('/nfs/a319/gy17m2a/PhD/datadir/RainGauge/interarrival_thresholds_CDD_noMissing.txt')
sample_cube = iris.load(f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_every30mins/2.2km_bng/{yrs_range}/{em}/bng_{em}a.pr{sample_yr}01.nc')[0][1,:,:]
gauge_num=1

bb222


In [2]:
base_dir = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/{timeperiod}/{em}/{gauge_num}/WholeYear"

In [3]:
# Get data 
pickle_file_filepath = f"/nfs/a319/gy17m2a/PhD/datadir/Gauge_Timeslices/{yrs_range}/{em}/gauge{gauge_num}_farFuture.nc"
full_year_cube = iris.load(pickle_file_filepath)[0]

Tb0, idx_2d = find_gauge_Tb0_and_location_in_grid(tbo_vals, gauge_num, sample_cube)

### This covers the full 20 years

In [4]:
# Get data 
pickle_file_filepath = f"/nfs/a319/gy17m2a/PhD/datadir/Gauge_Timeslices/{yrs_range}/{em}/gauge{gauge_num}_farFuture.nc"
full_year_cube = iris.load(pickle_file_filepath)[0]
data =full_year_cube.data

##### Filter cube according to different options
# Convert to dataframe
df = pd.DataFrame(data, columns=['precipitation (mm/hr)'])
df['times'] = full_year_cube.coord('time').units.num2date(full_year_cube.coord('time').points)
df['precipitation (mm)'] = df['precipitation (mm/hr)'] / 2   

In [17]:
def find_top_20_rainfall_cores(df, duration, Tb0):
    """
    Analyzes rainfall data to find the top 20 core periods of rainfall 
    based on maximum precipitation accumulation and checks for independence of the events.
    
    Args:
    df (pd.DataFrame): DataFrame containing precipitation data.
    duration (float): The duration over which to calculate the rolling sum, in hours.
    Tb0 (float): Threshold used to define a 'dry' period for splitting events.
    
    Returns:
    list: A list of DataFrames representing the top 20 rainfall cores.
    """

    ################
    # Calculate the rolling sum of precipitation over the specified duration
    ################

    # Determine the length of the window based on provided duration
    window_length = int(duration * 2)

    # Identify dry periods based on a precipitation threshold
    df['is_dry'] = df['precipitation (mm/hr)'] < 0.1

    # Calculate the rolling sum of precipitation
    df['Rolling_Sum'] = df['precipitation (mm)'].rolling(window=window_length).sum()

    ################
    # Identify the indices of the top 20 maxima in the rolling sum
    ################
    
    # Get the top 20 max indices
    top_20_indices = df['Rolling_Sum'].nlargest(20).index.tolist()

    rainfall_cores = []

    for max_rainfall_end_index in top_20_indices:
        # Ensure we have a valid index and the length of window
        if max_rainfall_end_index < window_length:
            continue  # Skip if the window goes out of bounds
        
        # Convert index to a positional integer for slicing
        max_rainfall_end_pos = df.index.get_loc(max_rainfall_end_index)

        # Calculate the start position of the window, ensuring it doesn't go below the DataFrame's range
        max_rainfall_start_pos = max(0, max_rainfall_end_pos - window_length)

        # Extract the window of maximum rainfall from the DataFrame
        max_rainfall_window = df.iloc[max_rainfall_start_pos:max_rainfall_end_pos].copy()

        ################
        # Check whether this is one independent event, or two
        ################
        # Initialize a column to keep track of consecutive dry periods within the window
        max_rainfall_window['consecutive_dry'] = 0

        # Iterate through the rows of the extracted window to count consecutive dry periods
        consecutive_dry_count = 0
        for i in range(len(max_rainfall_window)):
            if max_rainfall_window.iloc[i]['is_dry']:
                consecutive_dry_count += 1
            else:
                consecutive_dry_count = 0
            max_rainfall_window.iloc[i, max_rainfall_window.columns.get_loc('consecutive_dry')] = consecutive_dry_count

        # Check if the maximum consecutive dry period exceeds twice the Tb0 threshold
        if np.nanmax(max_rainfall_window['consecutive_dry']) > Tb0 * 2:
            print('2 events found')
            split_index = max_rainfall_window[max_rainfall_window['consecutive_dry'] == (Tb0 * 2)].index[0]
            event1 = max_rainfall_window.loc[:split_index]
            event2 = max_rainfall_window.loc[split_index:]
            rainfall_cores.append(event1)
            rainfall_cores.append(event2)
        else:
            rainfall_cores.append(max_rainfall_window)

    return rainfall_cores[:20]  # Ensure we return only the top 20 cores


In [18]:
rainfall_cores = find_top_20_rainfall_cores(df, duration=duration, Tb0=Tb0)
rainfall_cores

[        precipitation (mm/hr)                times  precipitation (mm)  \
 306301                  6.841  2078-09-22 06:45:00               3.420   
 306302                 40.820  2078-09-22 07:15:00              20.410   
 
         is_dry  Rolling_Sum  consecutive_dry  
 306301   False        3.502                0  
 306302   False       23.831                0  ,
         precipitation (mm/hr)                times  precipitation (mm)  \
 131804                  0.093  2068-08-16 22:15:00               0.047   
 131805                 44.732  2068-08-16 22:45:00              22.366   
 
         is_dry  Rolling_Sum  consecutive_dry  
 131804    True        0.150                1  
 131805   False       22.413                0  ,
         precipitation (mm/hr)                times  precipitation (mm)  \
 306300                  0.163  2078-09-22 06:15:00               0.081   
 306301                  6.841  2078-09-22 06:45:00               3.420   
 
         is_dry  Rolling_Sum 

In [19]:
rainfall_cores[1]

Unnamed: 0,precipitation (mm/hr),times,precipitation (mm),is_dry,Rolling_Sum,consecutive_dry
131804,0.093,2068-08-16 22:15:00,0.047,True,0.15,1
131805,44.732,2068-08-16 22:45:00,22.366,False,22.413,0
