In [5]:
import pandas as pd
import iris
import glob
import sys
import os
import time
import warnings
import pickle
from collections import OrderedDict

from Identify_Events_Functions import *
from Prepare_Data_Functions import *

pd.set_option('display.float_format', '{:.3f}'.format)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning, message=".*'+init=<authority>:<code>' syntax is deprecated.*")

######################################################
### Define which rainfall data we are looking for events in
######################################################
dataset_name = 'filtered_100'
dataset_path_pattern = '/nfs/a161/gy17m2a/PhD/datadir/NIMROD/5mins/OriginalFormat_1km/{year}/*'

######################################################
### Define data for finding the indepedent events at each gauge
######################################################

# Get Tb0 values at each gauge
tbo_vals = pd.read_csv('/nfs/a319/gy17m2a/PhD/datadir/RainGauge/interarrival_thresholds_CDD_noMissing.txt')
# Read in a sample cube for finding the location of gauge in grid
yr=2006
sample_cube = iris.load(f'/nfs/a161/gy17m2a/PhD/datadir/NIMROD/5mins/OriginalFormat_1km/{yr}/metoffice-c-band-rain-radar_uk_{yr}0602.nc')[0][1,:,:]

######################################################
### Get all the 5 minute data for one year, into one cube
# (if it already exists in a pickle file, then load it from there)
######################################################
general_filename = dataset_path_pattern.format(year=yr)
pickle_file_filepath = f"/nfs/a319/gy17m2a/PhD/datadir/cache/nimrod_5mins/unfiltered/WholeYear/cube_{yr}.pkl"

if os.path.exists(pickle_file_filepath):
    print("Pickle file exists, so loading that")
    full_year_cube = load_cube_from_picklefile(pickle_file_filepath)
else:
    print("Pickle file doesnt exist, so creating and then saving that")
    
    ### Get the data filepaths
    print(f"Loading data for year {yr}")
    
    # Create cube list
    cubes = load_files_to_cubelist(yr, general_filename)
    
    # Clean cubes of things which are problematic for concatenation
    cubes = clean_cubes(cubes)
    
    # Join them into one (with error handling to deal with times which are wrong)
    try:
        full_year_cube = cubes.concatenate_cube()
        print("Concatenation successful!")
    except Exception as e:
        print(f"Initial concatenation failed: {str(e)}")

        # If initial concatenation fails, remove problematic cubes and try again
        try:
            full_year_cube = remove_problematic_cubes(cubes)
            print("Concatenation successful after removing problematic cubes!")
        except RuntimeError as e:
            print(f"Concatenation failed after removing problematic cubes: {str(e)}")               
    save_cube_as_pickle_file(full_year_cube, pickle_file_filepath)

######################################################
# Find events at each gauge
######################################################
failed_gauges = []
for yr in range(2006,2020):
    print(yr)
    gauge_nums = range(250,300)
    # Function to process each gauge
    for gauge_num in gauge_nums:
        if not gauge_num in [423, 444, 827, 888]:
                print(f"gauge num is {gauge_num}")

                ######################################################
                ## Check if any files are missing, across the 3 filtering options
                # If there are: code will continue to run
                # If not: code will move to next gauge
                ######################################################
                for dataset_name in ['unfiltered', 'filtered_100', 'filtered_300']:
                    # Create a flag to record whether we are missing any of the files we need
                    missing_files = False
                    # Define directory filepath which will store results
                    base_dir = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD_5mins/NIMROD_1km_{dataset_name}/{gauge_num}/WholeYear"
                    # Create the directory if it doesnt exist
                    if not os.path.isdir(base_dir):
                        os.makedirs(base_dir)
                    # Check if we are missing any of the files, and if so, change the flag to True
                    if not any(os.path.exists(f"{base_dir}/{duration}hrs_{yr}_v2_part0.csv") for duration in [0.5, 1, 2, 3, 6, 12, 24]):
                        missing_files = True

                # If we are missing some files then get the data for the grid cell, 
                if missing_files:
                    # Find the Tb0 and index of this gauge
                    Tb0, idx_2d = find_gauge_Tb0_and_location_in_grid(tbo_vals, gauge_num, sample_cube)

                    # Extract data for the specified indices
                    start= time.time()
                    one_location_cube = full_year_cube[:, idx_2d[0], idx_2d[1]]
                    data = one_location_cube.data
                    end=time.time()
                    print(f"Time to load data is {round(end-start,2)} seconds")

                    ##### Filter cube according to different options
                    # Find events with filtered cubes
                    filtering_dict = {1000000:'unfiltered', 300:'filtered_300',100:'filtered_100'}
                    for filtering_key, dataset_name in filtering_dict.items():
                        print(f"running for {dataset_name}")
                        # Create cube with filterings applied
                        cube = filtered_cube(one_location_cube,  filter_above=filtering_key)
                        print("reloading data")
                        data = cube.data
                        print(f"max value is {np.nanmax(cube.data)}")
                        # Convert to dataframe
                        df = create_df_with_gaps_filled_in(cube, data, time_resolution = 5)
                        # Search dataframe for events corresponding to durations
                        for duration in [0.5, 1, 2, 3, 6, 12, 24]:
                            base_dir = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD_5mins/NIMROD_1km_{dataset_name}/{gauge_num}/WholeYear"

                            filename =  f"{base_dir}/{duration}hrs_{yr}_v2_part0.csv"
                            if not os.path.exists(filename):
                                print(f"Finding the AMAX for {duration}hr events for gauge {gauge_num} in year {yr} for {dataset_name}")
                                # Find events
                                events_v2 = search_for_valid_events(df, duration=duration, Tb0=Tb0)

                                # Save events to CSV
                                for num, event in enumerate(events_v2):
                                    if len(event) > 1:
                                            event.to_csv(f"{base_dir}/{duration}hrs_{yr}_v2_part{num}.csv")
                                            if event['precipitation (mm/hr)'].isna().any():
                                                print("NANs in this event")
                            else:
                                print(f"already exists{filename}")
                                pass   

    print(f"failed gauges are: {failed_gauges}")


Pickle file exists, so loading that
2006
gauge num is 250
gauge num is 251
gauge num is 252
gauge num is 253
gauge num is 254
gauge num is 255
gauge num is 256
gauge num is 257
gauge num is 258
gauge num is 259
gauge num is 260
gauge num is 261
gauge num is 262
gauge num is 263
gauge num is 264
gauge num is 265
gauge num is 266
gauge num is 267
gauge num is 268
gauge num is 269
gauge num is 270
gauge num is 271
gauge num is 272
gauge num is 273
gauge num is 274
gauge num is 275
gauge num is 276
gauge num is 277
gauge num is 278
gauge num is 279
gauge num is 280
gauge num is 281
gauge num is 282
gauge num is 283
gauge num is 284
gauge num is 285
gauge num is 286
gauge num is 287
gauge num is 288
gauge num is 289
gauge num is 290
gauge num is 291
gauge num is 292
gauge num is 293
gauge num is 294
gauge num is 295
gauge num is 296
gauge num is 297
gauge num is 298
gauge num is 299
failed gauges are: []
2007
gauge num is 250
gauge num is 251
gauge num is 252
gauge num is 253
gauge num is 2

(845, 823)
Time to load data is 510.38 seconds
running for unfiltered
reloading data
max value is 55.3125
Finding the AMAX for 0.5hr events for gauge 273 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 12.559896469116211
Finding the AMAX for 1hr events for gauge 273 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 12.559896469116211
Finding the AMAX for 2hr events for gauge 273 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 12.619792938232422
Finding the AMAX for 3hr events for gauge 273 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 12.619792938232422
Finding the AMAX for 6hr events for gauge 273 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 12.619792938232422
Finding the AMAX for 12hr events for gauge 273 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 12.651041984558105
Finding the AMAX for 24hr events for gauge 27

Finding the AMAX for 0.5hr events for gauge 275 in year 2008 for filtered_100
Event contains NAN, total event precip is 31.263023376464844
Event doesnt contain NAN, total event precip is 16.523439407348633
Finding the AMAX for 1hr events for gauge 275 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 16.523439407348633
Finding the AMAX for 2hr events for gauge 275 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 16.54947853088379
Finding the AMAX for 3hr events for gauge 275 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 16.523439407348633
Finding the AMAX for 6hr events for gauge 275 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 16.734375
Finding the AMAX for 12hr events for gauge 275 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 16.734375
Finding the AMAX for 24hr events for gauge 275 in year 2008 for filtered_100
Event doesnt contain

Finding the AMAX for 0.5hr events for gauge 278 in year 2008 for filtered_300
Event contains NAN, total event precip is 13.924479484558105
Event doesnt contain NAN, total event precip is 12.450521469116211
Finding the AMAX for 1hr events for gauge 278 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 12.450521469116211
Finding the AMAX for 2hr events for gauge 278 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 12.450521469116211
Finding the AMAX for 3hr events for gauge 278 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 12.450521469116211
Finding the AMAX for 6hr events for gauge 278 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 18.9609375
Finding the AMAX for 12hr events for gauge 278 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 18.99479103088379
Finding the AMAX for 24hr events for gauge 278 in year 2008 for filtered_300
Event does

(846, 862)
Time to load data is 483.03 seconds
running for unfiltered
reloading data
max value is 98.375
Finding the AMAX for 0.5hr events for gauge 281 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 30.304689407348633
Finding the AMAX for 1hr events for gauge 281 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 30.304689407348633
Finding the AMAX for 2hr events for gauge 281 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 30.304689407348633
Finding the AMAX for 3hr events for gauge 281 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 30.304689407348633
Finding the AMAX for 6hr events for gauge 281 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 34.47395706176758
Finding the AMAX for 12hr events for gauge 281 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 34.47395706176758
Finding the AMAX for 24hr events for gauge 281 i

Finding the AMAX for 0.5hr events for gauge 283 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 10.484375953674316
Finding the AMAX for 1hr events for gauge 283 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 10.484375953674316
Finding the AMAX for 2hr events for gauge 283 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 10.484375953674316
Finding the AMAX for 3hr events for gauge 283 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 10.567708969116211
Finding the AMAX for 6hr events for gauge 283 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 10.700521469116211
Finding the AMAX for 12hr events for gauge 283 in year 2008 for filtered_100
Event contains NAN, total event precip is 12.4296875
Event contains NAN, total event precip is 15.450521469116211
Event doesnt contain NAN, total event precip is 15.65625
Finding the AMAX for 24hr events fo

Finding the AMAX for 0.5hr events for gauge 286 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 13.940103530883789
Finding the AMAX for 1hr events for gauge 286 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 13.940103530883789
Finding the AMAX for 2hr events for gauge 286 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 13.940103530883789
Finding the AMAX for 3hr events for gauge 286 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 13.940103530883789
Finding the AMAX for 6hr events for gauge 286 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 13.940103530883789
Finding the AMAX for 12hr events for gauge 286 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 13.940103530883789
Finding the AMAX for 24hr events for gauge 286 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 19.5390625
run

(740, 918)
Time to load data is 379.86 seconds
running for unfiltered
reloading data
max value is 31.71875
Finding the AMAX for 0.5hr events for gauge 289 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 11.348958015441895
Finding the AMAX for 1hr events for gauge 289 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 11.348958015441895
Finding the AMAX for 2hr events for gauge 289 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 11.348958015441895
Finding the AMAX for 3hr events for gauge 289 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 11.348958015441895
Finding the AMAX for 6hr events for gauge 289 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 11.348958015441895
Finding the AMAX for 12hr events for gauge 289 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 18.994789123535156
Finding the AMAX for 24hr events for gauge 2

Finding the AMAX for 0.5hr events for gauge 291 in year 2008 for filtered_100
Event contains NAN, total event precip is 21.841144561767578
Event doesnt contain NAN, total event precip is 15.036458015441895
Finding the AMAX for 1hr events for gauge 291 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 15.036458015441895
Finding the AMAX for 2hr events for gauge 291 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 15.036458015441895
Finding the AMAX for 3hr events for gauge 291 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 15.036458015441895
Finding the AMAX for 6hr events for gauge 291 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 15.036458015441895
Finding the AMAX for 12hr events for gauge 291 in year 2008 for filtered_100
Event doesnt contain NAN, total event precip is 23.541667938232422
Finding the AMAX for 24hr events for gauge 291 in year 2008 for filtered_100
E

Finding the AMAX for 0.5hr events for gauge 294 in year 2008 for filtered_300
Event contains NAN, total event precip is 6.700520992279053
Event doesnt contain NAN, total event precip is 10.950520515441895
Finding the AMAX for 1hr events for gauge 294 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 10.950520515441895
Finding the AMAX for 2hr events for gauge 294 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 12.815103530883789
Finding the AMAX for 3hr events for gauge 294 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 19.65885353088379
Finding the AMAX for 6hr events for gauge 294 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 19.65885353088379
Finding the AMAX for 12hr events for gauge 294 in year 2008 for filtered_300
Event doesnt contain NAN, total event precip is 19.65885353088379
Finding the AMAX for 24hr events for gauge 294 in year 2008 for filtered_300
Event

gauge num is 297
(816, 866)
Time to load data is 702.15 seconds
running for unfiltered
reloading data
max value is 59.84375
Finding the AMAX for 0.5hr events for gauge 297 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 8.8125
Finding the AMAX for 1hr events for gauge 297 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 8.8125
Finding the AMAX for 2hr events for gauge 297 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 8.8125
Finding the AMAX for 3hr events for gauge 297 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 8.8125
Finding the AMAX for 6hr events for gauge 297 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 8.8125
Finding the AMAX for 12hr events for gauge 297 in year 2008 for unfiltered
Event doesnt contain NAN, total event precip is 17.25520896911621
Finding the AMAX for 24hr events for gauge 297 in year 2008 for unfiltered
Event contain

PermissionError: [Errno 13] Permission denied: b'/nfs/a161/gy17m2a/PhD/datadir/NIMROD/5mins/OriginalFormat_1km/2006/metoffice-c-band-rain-radar_uk_20060602.nc'