In [201]:
def find_rainfall_core(df, duration, n_largest=20):

    # Determine the length of the window based on the provided duration
    window_length = int(duration * 2)  # Assuming data is in 30-min intervals

    # Calculate the rolling sum of precipitation over the specified window length
    df['Rolling_Sum'] = df['precipitation (mm)'].rolling(window=window_length).sum()

    # Identify the indices of the top n_largest maximum rainfall accumulations
    top_indices = df['Rolling_Sum'].nlargest(n_largest).index.tolist()

    # Extract the rainfall cores corresponding to the top indices
    rainfall_cores = [df.iloc[idx:idx + window_length] for idx in top_indices]

    # Calculate total precipitation for each core and pair it with the core
    cores_with_totals = [(core, core['precipitation (mm)'].sum()) for core in rainfall_cores]

    # Sort the cores by total precipitation in descending order
    cores_with_totals.sort(key=lambda x: x[1], reverse=True)

    # Return only the cores sorted by their total precipitation
    sorted_rainfall_cores = [core for core, _ in cores_with_totals]

    return sorted_rainfall_cores


def expand_events(rainfall_cores, df, Tb0):
    rainfall_events_expanded = []

    for rainfall_core in rainfall_cores:
        rainfall_core_after_search1 = search1(df, rainfall_core)
        rainfall_core_after_search2 = search2(df, rainfall_core_after_search1)
        rainfall_core_after_search3 = search3(df, rainfall_core_after_search2, Tb0=Tb0)
        
        # Check if the event has significant rainfall
        if len(rainfall_core_after_search3[rainfall_core_after_search3['precipitation (mm/hr)'] > 0.1]) > 0:
            # Remove leading dry values
            start_index = 0
            while start_index < len(rainfall_core_after_search3) and rainfall_core_after_search3['precipitation (mm/hr)'].iloc[start_index] < 0.1:
                start_index += 1
            
            # Remove trailing dry values
            end_index = len(rainfall_core_after_search3) - 1
            while end_index >= 0 and rainfall_core_after_search3['precipitation (mm/hr)'].iloc[end_index] < 0.1:
                end_index -= 1

            # Slice the DataFrame to remove leading/trailing dry values
            if start_index <= end_index:  # Ensure valid slicing
                rainfall_core_after_search3 = rainfall_core_after_search3.iloc[start_index:end_index + 1]

            rainfall_events_expanded.append(rainfall_core_after_search3)

    return rainfall_events_expanded

    
def find_top_n_rainfall_events(df, duration, Tb0, n_largest):
    # Find potential rainfall cores
    rainfall_cores = find_rainfall_core(df, duration, 400)
    
    top_events = []
    
    # Convert Tb0 from hours to timesteps (30 min = 0.5 hour)
    Tb0_timesteps = Tb0 * 2  # Tb0 in timesteps

    for core in rainfall_cores:
        # Expand events using the defined search functions
        expanded_event = expand_events([core], df, Tb0)

        # Check for overlaps within the Tb0 threshold
        overlaps = False
        for existing_event in top_events:
            # Check if any indices in the expanded event are within Tb0 timesteps of existing event indices
            for idx in expanded_event[0].index:
                # Calculate the range of indices for the existing event
                existing_event_range = existing_event.index
                # Check if the index is within Tb0 timesteps of any index in the existing event
                if any(abs(idx - ex_idx) <= Tb0_timesteps for ex_idx in existing_event_range):
                    overlaps = True
                    break
            if overlaps:
                # print("overlap")
                break  # Exit if any overlap is found

        # Only append if there's no overlap
        if not overlaps and expanded_event:
            top_events.append(expanded_event[0])  # Only append if there's no overlap

        # Limit the results to the top 20
        if len(top_events) >= n_largest:
            break

    return top_events


In [171]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from warnings import simplefilter
warnings.filterwarnings("ignore", category =UserWarning,)

import gc
import pickle
from collections import OrderedDict
from pyproj import Transformer
import numpy as np
import pandas as pd
import iris
import glob
import sys
import os
import cartopy.crs as ccrs
import itertools
from scipy import spatial
import numpy.ma as ma
import tilemapbase
from math import cos, radians
import geopandas as gpd
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
from pyproj import Proj, transform
import time

sys.path.insert(1,'../')
from Identify_Events_Functions import *
from Prepare_Data_Functions import *

pd.set_option('display.float_format', '{:.3f}'.format)
warnings.filterwarnings("ignore", category=UserWarning)

yrs_range= '2060_2081'
em = "bb222"
timeperiod = 'Future'
print(em)

if timeperiod == 'Future':
    sample_yr=2066
elif timeperiod == 'Present':
    sample_yr=2006
    
# Get Tb0 values at each gauge
tbo_vals = pd.read_csv('/nfs/a319/gy17m2a/PhD/datadir/RainGauge/interarrival_thresholds_CDD_noMissing.txt')
sample_cube = iris.load(f'/nfs/a319/gy17m2a/PhD/datadir/UKCP18_every30mins/2.2km_bng/{yrs_range}/{em}/bng_{em}a.pr{sample_yr}01.nc')[0][1,:,:]

bb222


In [202]:
for gauge_num in range(0,1):
    if gauge_num not in [444, 827, 888]:
        
        Tb0, idx_2d = find_gauge_Tb0_and_location_in_grid(tbo_vals, gauge_num, sample_cube)
        print(gauge_num, Tb0)
        
        # Get data 
        pickle_file_filepath = f"/nfs/a319/gy17m2a/PhD/datadir/Gauge_Timeslices/{yrs_range}/{em}/gauge{gauge_num}_farFuture.nc"
        full_year_cube = iris.load(pickle_file_filepath)[0]
        data =full_year_cube.data

        ##### Filter cube according to different options
        # Convert to dataframe
        df = pd.DataFrame(data, columns=['precipitation (mm/hr)'])
        df['times'] = full_year_cube.coord('time').units.num2date(full_year_cube.coord('time').points)
        df['precipitation (mm)'] = df['precipitation (mm/hr)'] / 2   

        # Search dataframe for events corresponding to durations
        base_dir = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/{timeperiod}/{em}/{gauge_num}/WholeYear"
        
        for duration in [0.5, 1, 2, 3, 6, 12, 24]:
            print(duration)
            # Find events
            top_40_events = find_top_n_rainfall_events(df, duration=duration, Tb0=Tb0, n_largest= 40)

            print(len(top_40_events))

0 7
0.5
40
1
40
2
40
3
40
6
40
12


IndexError: list index out of range

In [220]:
duration = 12
n_largest = 40
# Find potential rainfall cores
rainfall_cores = find_rainfall_core(df, duration, 400)

top_events = []

# Convert Tb0 from hours to timesteps (30 min = 0.5 hour)
Tb0_timesteps = Tb0 * 2  # Tb0 in timesteps

for number, core in enumerate(rainfall_cores):
    # Expand events using the defined search functions
    expanded_event = expand_events([core], df, Tb0)

    # Check for overlaps within the Tb0 threshold
    overlaps = False
    for existing_event in top_events:
        # Check if any indices in the expanded event are within Tb0 timesteps of existing event indices
        for idx in expanded_event[0].index:
            # Calculate the range of indices for the existing event
            existing_event_range = existing_event.index
            # Check if the index is within Tb0 timesteps of any index in the existing event
            if any(abs(idx - ex_idx) <= Tb0_timesteps for ex_idx in existing_event_range):
                overlaps = True
                break
        if overlaps:
            print(number, "overlap")
            break  # Exit if any overlap is found

    # Only append if there's no overlap
    if not overlaps and expanded_event:
        top_events.append(expanded_event[0])  # Only append if there's no overlap

    # Limit the results to the top 20
    if len(top_events) >= n_largest:
        break

1 overlap
2 overlap
3 overlap
6 overlap
7 overlap
8 overlap
9 overlap
10 overlap
11 overlap
13 overlap
14 overlap
15 overlap
16 overlap
17 overlap
18 overlap
19 overlap
20 overlap
21 overlap
23 overlap
24 overlap
25 overlap
26 overlap
27 overlap
28 overlap
29 overlap
30 overlap
31 overlap
32 overlap
33 overlap
34 overlap
35 overlap
36 overlap
37 overlap
39 overlap
40 overlap
42 overlap
44 overlap
45 overlap
47 overlap
48 overlap
49 overlap
51 overlap
52 overlap
53 overlap
54 overlap
56 overlap
57 overlap
58 overlap
59 overlap
60 overlap
63 overlap
64 overlap
66 overlap
67 overlap
68 overlap
69 overlap
71 overlap
72 overlap
73 overlap
74 overlap
75 overlap
76 overlap
77 overlap
78 overlap
79 overlap
80 overlap
81 overlap
82 overlap
83 overlap
85 overlap
86 overlap
87 overlap
88 overlap
89 overlap
90 overlap
92 overlap
93 overlap
94 overlap
95 overlap
96 overlap
97 overlap
98 overlap
99 overlap
100 overlap
101 overlap
102 overlap
103 overlap
104 overlap
105 overlap
106 overlap
108 overla

IndexError: list index out of range

In [225]:
core = rainfall_cores[350]
expanded_event = expand_events([core], df, Tb0)
expanded_event

[        precipitation (mm/hr)                times  precipitation (mm)  \
 268588                 18.800  2076-07-16 14:15:00               9.400   
 268589                 35.311  2076-07-16 14:45:00              17.656   
 268590                  3.660  2076-07-16 15:15:00               1.830   
 268591                  0.361  2076-07-16 15:45:00               0.181   
 268592                  0.061  2076-07-16 16:15:00               0.031   
 268593                  0.116  2076-07-16 16:45:00               0.058   
 
         Rolling_Sum  
 268588        9.400  
 268589       27.056  
 268590       28.886  
 268591       29.067  
 268592       29.097  
 268593       29.155  ]

In [223]:
core

Unnamed: 0,precipitation (mm/hr),times,precipitation (mm),Rolling_Sum
268605,0.0,2076-07-16 22:45:00,0.0,29.155
268606,0.0,2076-07-16 23:15:00,0.0,29.155
268607,0.0,2076-07-16 23:45:00,0.0,29.155
268608,0.0,2076-07-17 00:15:00,0.0,29.155
268609,0.0,2076-07-17 00:45:00,0.0,29.155
268610,0.0,2076-07-17 01:15:00,0.0,29.155
268611,0.0,2076-07-17 01:45:00,0.0,29.155
268612,0.0,2076-07-17 02:15:00,0.0,19.755
268613,0.0,2076-07-17 02:45:00,0.0,2.1
268614,0.0,2076-07-17 03:15:00,0.0,0.269


In [167]:
base_dir = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents_40pergauge/UKCP18_30mins/{timeperiod}/{em}/{gauge_num}/WholeYear"




'/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/Future/bb222/1/WholeYear'