In [1]:
## Ran for eveything now with excluding the gauges that we don't want

# Create a set of dimensionless profiles
Read in all of the events, for all durations, for all gauges, for all ensemble members.  
Convert them to dimensionless profiles, with 12 values between 0 and 1.  
Each value is a dimensionless, cumulative rainfall value (cumulative rainfall at this timestep, normalised by the total event rainfall):
- 0 means no rainfall has occurred, and 
- 1 means the total event rainfall has been reached.  

If there are less than 12 values, then these are filled in with interpolation

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import pandas as pd
import re
from datetime import datetime

from Create_Profiles_Functions import *

In [2]:
gauge_nums = range(0,1263)

# NIMROD
### Make profiles

In [216]:
gauge_nums =range(0,1263)
for nimrod_option in ["NIMROD_1km_filtered_100"]:
    print(nimrod_option)

    # Initialize lists to store results
    nimrod_profiles = []
    nimrod_normalised_rainfall = []
    durations_for_nimrod_profiles = []
    real_durations_for_nimrod_profiles = []
    volumes_for_nimrod_profiles = []
    dimensionless_profiles = [] 
    max_quintiles = []
    seasons = []
    files_ls = []
    gauge_nums_ls=[]
    
    # Process each gauge
    for gauge_num in gauge_nums:
        # the gauges that didn't work for finding events
        if gauge_num not in [423, 444, 827, 888]:
            # Excluding ones near windfarms
            if gauge_num not in [27,36,57,61,75,97,101,106,120,132,190,204, 239, 285,348,376]:
                if gauge_num % 100 == 0:
                    print(gauge_num)

                # Create a list of all the event CSVs
                files = [f for f in os.listdir(f"../../../ProcessedData/IndependentEvents/NIMROD/{nimrod_option}/{gauge_num}/WholeYear") if f.endswith('.csv')]
                files = np.sort(files)

                # Process each file
                for file in files:
                    fp = f"../../../ProcessedData/IndependentEvents/NIMROD/{nimrod_option}/{gauge_num}/WholeYear/{file}"
                    interpolated_rainfall, normalized_rainfall, duration, real_duration, precipitation_sum, max_quintile, season  =  process_file(gauge_num, fp, file)
                    
                    if interpolated_rainfall is not None and duration is not None and precipitation_sum is not None:
                        nimrod_normalised_rainfall.append(normalized_rainfall)
                        nimrod_profiles.append(interpolated_rainfall)
                        durations_for_nimrod_profiles.append(duration)
                        real_durations_for_nimrod_profiles.append(real_duration)
                        volumes_for_nimrod_profiles.append(precipitation_sum)
                        dimensionless_profiles.append(normalized_rainfall)
                        max_quintiles.append(max_quintile)
                        seasons.append(season)
                        files_ls.append(file)
                        gauge_nums_ls.append(gauge_num)

    ## Remove profiles containing NANs
    nimrod_profiles, nimrod_normalised_rainfall, dimensionless_profiles, durations_for_nimrod_profiles, real_durations_for_nimrod_profiles, volumes_for_nimrod_profiles, max_quintiles, seasons =  check_for_nan(nimrod_profiles, nimrod_normalised_rainfall, dimensionless_profiles, durations_for_nimrod_profiles, real_durations_for_nimrod_profiles, volumes_for_nimrod_profiles, max_quintiles, seasons)
    names =['profiles', 'normalised_rainfall', "dimensionless_profiles",'durations_for_profiles',  'real_durations_for_profiles','volumes_for_profiles','max_quintiles', "seasons"]
    
    for number, file in enumerate([nimrod_profiles, nimrod_normalised_rainfall, dimensionless_profiles, durations_for_nimrod_profiles, real_durations_for_nimrod_profiles,volumes_for_nimrod_profiles, max_quintiles, seasons]):
        file_name = names[number]
        cache_filepath = f"/nfs/a319/gy17m2a/PhD/ProcessedData/Profiles/NIMROD/WholeYear/{nimrod_option}_{file_name}.pkl"
        with open(cache_filepath, 'wb') as f:
            print(cache_filepath)
            pickle.dump(file, f)

NIMROD_1km_filtered_100
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


# NIMROD 5 mins

In [15]:
for nimrod_option in ["NIMROD_1km_unfiltered"]:

    # New list to store just the files that are 'good' e.g. 
    dict_of_things_im_storing ={}
    dict_of_things_im_storing['good_raw_events'] = []
    dict_of_things_im_storing['good_raw_events_seasons'] = []
    dict_of_things_im_storing['good_trimmed_events'] = []
    dict_of_things_im_storing['good_trimmed_events_durations'] = []
    dict_of_things_im_storing['good_normalised_events'] = []
    dict_of_things_im_storing['good_normalised_interpolated_events'] = []
    dict_of_things_im_storing['good_normalised_interpolated_events_max_quintiles'] = []

    for gauge_num in range(11,100):

        # Create a list of all the event CSVs
        files = [f for f in os.listdir(f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD_5mins/{nimrod_option}/{gauge_num}/WholeYear") if f.endswith('.csv')]
        files = np.sort(files)

        # Go through each event
        for file in files:
            # Construct the path to the file
            fp = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD_5mins/{nimrod_option}/{gauge_num}/WholeYear/{file}"
            # Read in event
            this_event = read_event(gauge_num, fp)
            # Trim leading and trailing zeroes
            trimmed_event = remove_leading_and_trailing_zeroes(this_event)
            # Check trimmed event for 'problems', if there are none, return a dataframe, if there are, return None
            trimmed_event = remove_events_with_problems(trimmed_event, verbose=True)
            
            # If we still have a dataframe, then add it to list of good, unprocessed events
            if trimmed_event is not None:

                # Create normalised version of event
                normalized_time, normalized_rainfall = create_normalised_event(trimmed_event)
                # Create interpolated, normalised version of event (with 12 data points)
                normalized_interpolated_rainfall = interpolate_and_bin(normalized_time, normalized_rainfall)
                # Find the fifth OF THE INTERPOLATED, NORMALISED EVENT with the maximum amount of rainfall
                max_quintile = find_fifth_with_most_rain(normalized_interpolated_rainfall)

                # Add to list of events with no problems
                dict_of_things_im_storing['good_raw_events'].append(this_event)
                dict_of_things_im_storing['good_raw_events_seasons'].append(get_season(this_event['times'][0]))
                dict_of_things_im_storing['good_trimmed_events'].append(trimmed_event)
                dict_of_things_im_storing['good_trimmed_events_durations'].append(len(trimmed_event)/2)
                dict_of_things_im_storing['good_normalised_events'].append(normalized_rainfall)
                dict_of_things_im_storing['good_normalised_interpolated_events'].append(normalized_interpolated_rainfall)
                dict_of_things_im_storing['good_normalised_interpolated_events_max_quintiles'].append(max_quintile)

    for file_name,stored_list in dict_of_things_im_storing.items():
        print(file_name)
        cache_filepath = f"/nfs/a319/gy17m2a/PhD/ProcessedData/Profiles/NIMROD_5mins/WholeYear/{nimrod_option}_{file_name}.pkl"
        with open(cache_filepath, 'wb') as f:
            pickle.dump(stored_list, f)                    

Contains NANs
Too short to be an event
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Contains NANs
Contains NANs
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Contains NANs
Contains NANs
Doesn't contain more than 1 value which isn't 0
Too short to be an event
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which is

Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Contains NANs
Too short to be an event
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Contains NANs
Too short to be an event
Too short to be an event
Too short to be an event
Too short to be an event
Too short to be an event
Too short to be an event
Too short to be an event
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Contains NANs
Contains NANs
Doesn't contain more than 1 value which isn't 0
Contains NANs
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Too short to be an event
Doesn't

# UKCP18
### Make and pickle profiles

In [66]:
def remove_events_with_problems(df, verbose=True):
    
    if len(df) < 2:
        if verbose:
            print(f"Too short to be an event")
            #print(df)
        return None
    if (df['time_since_last_minutes'] > 30).any(): 
        if verbose:
            #print(df)
            print(f"More than 30 minute gap between each time step")
        return None
    if not len(df[df['precipitation (mm/hr)'] > 0]) > 2:
        if verbose:
            #print(test)
            print(f"Doesn't contain more than 1 value which isn't 0")
        return None
    if df['precipitation (mm/hr)'].isna().any():
        if verbose:
            #print(test)
            print(f"Contains NANs")
        return None
    
    return df

In [68]:
gauge_num=0
files = [f for f in os.listdir(f"../../../ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/") if f.endswith('.csv')]
files = np.sort(files)

# Go through each event
for file in files:
    # Construct the path to the file
    fp = f"../../../ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/{file}"
    # Read in event
    this_event = read_event(gauge_num, fp)
    # Trim leading and trailing zeroes
    trimmed_event = remove_leading_and_trailing_zeroes(this_event)
    
    # Check trimmed event for 'problems', if there are none, return a dataframe, if there are, return None
    real_trimmed_event = remove_events_with_problems(trimmed_event, verbose=True)

    # If we still have a dataframe, then add it to list of good, unprocessed events
    if real_trimmed_event is not None:

        # Create normalised version of event
        normalized_time, normalized_rainfall = create_normalised_event(real_trimmed_event)
        # Create interpolated, normalised version of event (with 12 data points)
        normalized_interpolated_rainfall = interpolate_and_bin(normalized_time, normalized_rainfall)
        # Find the fifth OF THE INTERPOLATED, NORMALISED EVENT with the maximum amount of rainfall
        max_quintile = find_fifth_with_most_rain(normalized_interpolated_rainfall)
    else:
        pass
        # print(trimmed_event[:5])


Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0


In [70]:
for em in ["bc005"]:

    # New list to store just the files that are 'good' e.g. 
    dict_of_things_im_storing ={}
    dict_of_things_im_storing['good_raw_events'] = []
    dict_of_things_im_storing['good_raw_events_seasons'] = []
    dict_of_things_im_storing['good_trimmed_events'] = []
    dict_of_things_im_storing['good_trimmed_events_durations'] = []
    dict_of_things_im_storing['good_normalised_events'] = []
    dict_of_things_im_storing['good_normalised_interpolated_events'] = []
    dict_of_things_im_storing['good_normalised_interpolated_events_max_quintiles'] = []

    for gauge_num in range(0,12):
        print(gauge_num)
        files = [f for f in os.listdir(f"../../../ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/") if f.endswith('.csv')]
        files = np.sort(files)

        # Go through each event
        for file in files:
            # Construct the path to the file
            fp = f"../../../ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/{file}"
            # Read in event
            this_event = read_event(gauge_num, fp)
            # Trim leading and trailing zeroes
            trimmed_event = remove_leading_and_trailing_zeroes(this_event)

            # Check trimmed event for 'problems', if there are none, return a dataframe, if there are, return None
            real_trimmed_event = remove_events_with_problems(trimmed_event, verbose=True)

            # If we still have a dataframe, then add it to list of good, unprocessed events
            if real_trimmed_event is not None:

                # Create normalised version of event
                normalized_time, normalized_rainfall = create_normalised_event(real_trimmed_event)
                # Create interpolated, normalised version of event (with 12 data points)
                normalized_interpolated_rainfall = interpolate_and_bin(normalized_time, normalized_rainfall)
                # Find the fifth OF THE INTERPOLATED, NORMALISED EVENT with the maximum amount of rainfall
                max_quintile = find_fifth_with_most_rain(normalized_interpolated_rainfall)
                
                # Add to list of events with no problems
                dict_of_things_im_storing['good_raw_events'].append(this_event)
                dict_of_things_im_storing['good_raw_events_seasons'].append(get_season(this_event['times'][0]))
                dict_of_things_im_storing['good_trimmed_events'].append(trimmed_event)
                dict_of_things_im_storing['good_trimmed_events_durations'].append(len(trimmed_event)/2)
                dict_of_things_im_storing['good_normalised_events'].append(normalized_rainfall)
                dict_of_things_im_storing['good_normalised_interpolated_events'].append(normalized_interpolated_rainfall)
                dict_of_things_im_storing['good_normalised_interpolated_events_max_quintiles'].append(max_quintile)
                
            else:
                pass
                # print(trimmed_event[:5])


#     for file_name,stored_list in dict_of_things_im_storing.items():
#         print(file_name)
#         cache_filepath = f"/nfs/a319/gy17m2a/PhD/ProcessedData/Profiles/UKCP18_30mins/{em}/{file_name}.pkl"
#         with open(cache_filepath, 'wb') as f:
#             pickle.dump(stored_list, f)                    

0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
1
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Too short to be an event
Too short to be an event
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
2
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Too short to be an event
Too short to be an event
Doesn't contain more than 1 value which isn't 0
Too short to be an event
Too short to be an event
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 value which isn't 0
Doesn't contain more than 1 