In [1]:
## Ran for eveything now with excluding the gauges that we don't want

# Create a set of dimensionless profiles
Read in all of the events, for all durations, for all gauges, for all ensemble members.  
Convert them to dimensionless profiles, with 12 values between 0 and 1.  
Each value is a dimensionless, cumulative rainfall value (cumulative rainfall at this timestep, normalised by the total event rainfall):
- 0 means no rainfall has occurred, and 
- 1 means the total event rainfall has been reached.  

If there are less than 12 values, then these are filled in with interpolation

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import pandas as pd
import re
from datetime import datetime

from Create_Profiles_Functions import *

quintile_mapping = {1: 'F2', 2: 'F1', 3: 'C', 4: 'B1', 5: 'B2'}

In [4]:
def find_part_with_most_rain(array, n, plot=False, ax=False):
    if array is None:
        return None
    else:

        # Compute differences
        # Split the array into 5 equal parts
        splits = np.array_split(array, n)

        max_array_rainfall = 0
        max_array_num = None

        total_precipitations = []  # To store total precipitation for each split
        split_ranges = []  # To store start and end indices for each split

        # Calculate total precipitation for each split
        split_start = 0
        for split in splits:
            total_precipitation = split.sum()
            total_precipitations.append(total_precipitation)
            split_end = split_start + len(split)
            split_ranges.append((split_start, split_end))
            if total_precipitation > max_array_rainfall:
                max_array_num = len(total_precipitations)
                max_array_rainfall = total_precipitation
            split_start = split_end

        colors = ['lightblue'] * n  # Default color for all splits
        highlight_color = 'yellow'  # Color for the split with the most rainfall

        if plot:
            # Plot the array
            ax.plot(range(1, len(array) + 1), array, label='Precipitation', marker='o')

            # Add vertical lines and shading for each split segment
            for i, (start_index, end_index) in enumerate(split_ranges):
                color = highlight_color if (i + 1) == max_array_num else colors[i]

                # Add vertical lines at the start and end of each split
                ax.axvline(x=start_index + 1, color=color, linestyle='--', label=f'Split {i+1} Start' if i == 0 or (i + 1) == max_array_num else "")
                ax.axvline(x=end_index, color=color, linestyle='--', label=f'Split {i+1} End' if i == 0 or (i + 1) == max_array_num else "")

                # Shade the region for the split
                ax.fill_between(range(start_index + 1, end_index + 1), array[start_index:end_index], color=color, alpha=0.3)

                # Add the total precipitation value behind the shading
                ax.text((start_index + end_index) / 2+0.5, max(array) * 0.05,  # Adjust y-position if needed
                        f'{total_precipitations[i]:.2f}',
                        ha='center', va='center', fontsize=10, color='black', weight='bold', zorder=1)

            ax.set_title(f'Precipitation Values with Splits Marked. Max at {max_array_num}')
            ax.set_xlabel('Time')
            ax.set_ylabel('Precipitation')

    return max_array_num


def create_normalised_event(rainfall):
    # Check if the input array is None or empty
    if rainfall is None or len(rainfall) == 0:
        # print("Input array is None or empty. Cannot normalize.")
        return None

    # Check if the maximum value is zero to avoid division by zero
    if np.max(rainfall) == 0:
        print("Maximum rainfall is zero. Cannot normalize.")
        return rainfall  # Return the input as-is, or handle appropriately

    # Normalize rainfall from 0 to 1 using the maximum value
    normalized_rainfall = rainfall / np.max(rainfall)

    # Debug prints to check the input and output
    return normalized_rainfall

def create_cumulative_event(rainfall):
    
    # Calculate cumulative rainfall
    cumulative_rainfall = np.cumsum(rainfall)
    
    return cumulative_rainfall

def interpolate_rainfall(rainfall, bin_number):
    if rainfall is None or len(rainfall) < 2:
        return None

    # Define target points for bin_number bins
    target_points = np.linspace(0, 1, bin_number)
    
    # Create interpolation function based on existing data points
    rainfall_times = np.array(range(0, len(rainfall)))

    # Normalize time from 0 to 1
    normalized_time = (rainfall_times - rainfall_times[0]) / (rainfall_times[-1] - rainfall_times[0])
    interpolation_func = interp1d(normalized_time, rainfall, kind='linear', fill_value="extrapolate")
    
    # Interpolate values at target points
    interpolated_values = interpolation_func(target_points)
    
    return interpolated_values


def create_incremental_event(cumulative_rainfall):
    if cumulative_rainfall is None :
        return None
    
    raw_rainfall = np.diff(cumulative_rainfall, prepend=0)
    raw_rainfall[0] = cumulative_rainfall[0]
    return raw_rainfall

def extract_year(df):
    # Ensure the 'times' column is in datetime format
    df['times'] = pd.to_datetime(df['times'], errors='coerce')  # errors='coerce' will handle invalid parsing
    # Extract the year
    return df['times'].dt.year[0]

In [3]:
gauge_nums = range(0,1294)

# NIMROD - 30 mins
### Make profiles

In [None]:
# # Initialize an empty DataFrame with the desired columns
columns = [
    'gauge_num',  'season', 'precip', 'Volume','Year', 'times', 'duration',
    'normalized_rainfall', 'normalized_interpolated_rainfall_12', 'normalized_interpolated_rainfall_15',
    'max_quintile_profile_12', 'max_quintile_profile_15', 'max_quintile_normalised_rain', 'max_quintile_raw_rain']
df = pd.DataFrame(columns=columns)

for nimrod_option in ["NIMROD_2.2km_filtered_100"]:
    for gauge_num in range(0, 1293):
        if gauge_num not in [444, 827, 888]:
            print(f"gauge {gauge_num}")
            files = [f for f in os.listdir(f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD_30mins/{nimrod_option}/{gauge_num}/WholeYear") if f.endswith('.csv')]
            files = np.sort(files)

            for file in files:
                fp = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/NIMROD_30mins/{nimrod_option}/{gauge_num}/WholeYear/{file}"
                if '2080' in fp:
                    continue

                this_event = read_event(gauge_num, fp)
                trimmed_event = remove_leading_and_trailing_zeroes(this_event)
                real_trimmed_event, problem_events = remove_events_with_problems(trimmed_event, verbose=False)

                if real_trimmed_event is not None:
                    
                    precip = real_trimmed_event['precipitation (mm/hr)']
                    normalized_rainfall = create_normalised_event(real_trimmed_event['precipitation (mm/hr)'])
                    cumulative_normalized_rainfall = create_cumulative_event(normalized_rainfall)
                    interpolated15_cumulative_normalized_rainfall = interpolate_rainfall(cumulative_normalized_rainfall,15)
                    interpolated12_cumulative_normalized_rainfall = interpolate_rainfall(cumulative_normalized_rainfall,12)
                    interpolated15_incremental_normalized_rainfall = create_incremental_event(interpolated15_cumulative_normalized_rainfall)
                    interpolated12_incremental_normalized_rainfall = create_incremental_event(interpolated12_cumulative_normalized_rainfall)

                    max_quintile_profile_12 = find_part_with_most_rain(interpolated12_incremental_normalized_rainfall, 5)
                    max_quintile_profile_15 = find_part_with_most_rain(interpolated15_incremental_normalized_rainfall, 5)
                    max_quintile_normalised_rain = find_part_with_most_rain(normalized_rainfall, 5)
                    max_quintile_raw_rain = find_part_with_most_rain(precip, 5)
                    
                    duration = len(real_trimmed_event) / 2
                    times = trimmed_event['times']
                    season = get_season(trimmed_event['times'][0])
                    year=extract_year(trimmed_event)
                    
                else:
                    precip=trimmed_event['precipitation (mm/hr)']
                    normalized_rainfall = None
                    normalized_interpolated_rainfall_12 = None
                    normalized_interpolated_rainfall_15 = None
                    max_quintile_profile_12 = None
                    max_quintile_profile_15 = None
                    max_quintile_normalised_rain = None
                    max_quintile_raw_rain = None
                    duration = None
                    season = None
                    year=None
                    times=None
                    
                # Append the row to the DataFrame
                df = df.append({
                    'gauge_num': gauge_num,
                    'season': season,
                    'precip': precip.values,
                    'Volume': sum(precip),
                    'Year':year,
                    'times':times, 
                    'duration': duration,
                    'normalized_rainfall': normalized_rainfall,
                    'normalized_interpolated_rainfall_12': interpolated12_incremental_normalized_rainfall,
                    'normalized_interpolated_rainfall_15': interpolated15_cumulative_normalized_rainfall,
                    'max_quintile_profile_12': max_quintile_profile_12,
                    'max_quintile_profile_15': max_quintile_profile_15,
                    'max_quintile_normalised_rain': max_quintile_normalised_rain,
                    'max_quintile_raw_rain': max_quintile_raw_rain
                }, ignore_index=True)
                
df['Loading_profile12'] = df['max_quintile_profile_12'].map(quintile_mapping)
df['Loading_profile15'] = df['max_quintile_profile_15'].map(quintile_mapping)
df['Loading_profile_normalised_rain'] = df['max_quintile_normalised_rain'].map(quintile_mapping)
df['Loading_profile_raw_rain'] = df['max_quintile_raw_rain'].map(quintile_mapping)

with open(f"/nfs/a319/gy17m2a/PhD/ProcessedData/Profiles/NIMROD_30mins/WholeYear/{nimrod_option}_profiles_df.pkl", 'wb') as file:
    pickle.dump(df, file)

gauge 0
gauge 1
gauge 2
gauge 3
gauge 4


# NIMROD 5 mins

# UKCP18
### Make and pickle profiles

In [None]:
# # Initialize an empty DataFrame with the desired columns
# Initialize an empty DataFrame with the desired columns
columns = [
    'gauge_num',  'season', 'precip', 'Volume','Year', 'times', 'duration',
    'normalized_rainfall', 'normalized_interpolated_rainfall_12', 'normalized_interpolated_rainfall_15',
    'max_quintile_profile_12', 'max_quintile_profile_15', 'max_quintile_normalised_rain', 'max_quintile_raw_rain']
df = pd.DataFrame(columns=columns)

for em in ["bb198"]:
    for gauge_num in range(0, 1293):
        if gauge_num not in [444, 827, 888]:
            print(f"gauge {gauge_num}")
            files = [f for f in os.listdir(f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/WholeYear/") if f.endswith('.csv')]
            files = np.sort(files)

            for file in files:
                fp = f"/nfs/a161/gy17m2a/PhD/ProcessedData/IndependentEvents/UKCP18_30mins/{em}/{gauge_num}/WholeYear/{file}"
                if '2080' in fp:
                    continue

                this_event = read_event(gauge_num, fp)
                trimmed_event = remove_leading_and_trailing_zeroes(this_event)
                real_trimmed_event, problem_events = remove_events_with_problems(trimmed_event, verbose=False)

                if real_trimmed_event is not None:
                    
                    precip = real_trimmed_event['precipitation (mm/hr)']
                    normalized_rainfall = create_normalised_event(real_trimmed_event['precipitation (mm/hr)'])
                    cumulative_normalized_rainfall = create_cumulative_event(normalized_rainfall)
                    interpolated15_cumulative_normalized_rainfall = interpolate_rainfall(cumulative_normalized_rainfall,15)
                    interpolated12_cumulative_normalized_rainfall = interpolate_rainfall(cumulative_normalized_rainfall,12)
                    interpolated15_incremental_normalized_rainfall = create_incremental_event(interpolated15_cumulative_normalized_rainfall)
                    interpolated12_incremental_normalized_rainfall = create_incremental_event(interpolated12_cumulative_normalized_rainfall)

                    max_quintile_profile_12 = find_part_with_most_rain(interpolated12_incremental_normalized_rainfall, 5)
                    max_quintile_profile_15 = find_part_with_most_rain(interpolated15_incremental_normalized_rainfall, 5)
                    max_quintile_normalised_rain = find_part_with_most_rain(normalized_rainfall, 5)
                    max_quintile_raw_rain = find_part_with_most_rain(precip, 5)
                    
                    duration = len(real_trimmed_event) / 2
                    times = trimmed_event['times']
                    season = get_season(trimmed_event['times'][0])
                    year=extract_year(trimmed_event)
                    
                else:
                    precip=trimmed_event['precipitation (mm/hr)']
                    normalized_rainfall = None
                    normalized_interpolated_rainfall_12 = None
                    normalized_interpolated_rainfall_15 = None
                    max_quintile_profile_12 = None
                    max_quintile_profile_15 = None
                    max_quintile_normalised_rain = None
                    max_quintile_raw_rain = None
                    duration = None
                    season = None
                    year=None
                    times=None
                    
                # Append the row to the DataFrame
                df = df.append({
                    'gauge_num': gauge_num,
                    'season': season,
                    'precip': precip.values,
                    'Volume': sum(precip),
                    'Year':year,
                    'times':times, 
                    'duration': duration,
                    'normalized_rainfall': normalized_rainfall,
                    'normalized_interpolated_rainfall_12': interpolated12_incremental_normalized_rainfall,
                    'normalized_interpolated_rainfall_15': interpolated15_cumulative_normalized_rainfall,
                    'max_quintile_profile_12': max_quintile_profile_12,
                    'max_quintile_profile_15': max_quintile_profile_15,
                    'max_quintile_normalised_rain': max_quintile_normalised_rain,
                    'max_quintile_raw_rain': max_quintile_raw_rain
                }, ignore_index=True)
                
df['Loading_profile12'] = df['max_quintile_profile_12'].map(quintile_mapping)
df['Loading_profile15'] = df['max_quintile_profile_15'].map(quintile_mapping)
df['Loading_profile_normalised_rain'] = df['max_quintile_normalised_rain'].map(quintile_mapping)
df['Loading_profile_raw_rain'] = df['max_quintile_raw_rain'].map(quintile_mapping)

with open(f"/nfs/a319/gy17m2a/PhD/ProcessedData/Profiles/UKCP18_30mins/{em}/profiles_df.pkl", 'wb') as file:
    pickle.dump(df, file)


gauge 0
gauge 1
gauge 2
gauge 3
gauge 4
gauge 5
gauge 6
gauge 7
gauge 8
gauge 9
gauge 10
Some dates were invalid and have been coerced to NaT:
gauge 11
gauge 12
gauge 13
Some dates were invalid and have been coerced to NaT:
Some dates were invalid and have been coerced to NaT:
gauge 14
gauge 15
gauge 16
gauge 17
gauge 18
gauge 19
gauge 20
gauge 21
gauge 22
gauge 23
gauge 24
gauge 25
Some dates were invalid and have been coerced to NaT:
gauge 26
gauge 27
gauge 28
Some dates were invalid and have been coerced to NaT:
gauge 29
Some dates were invalid and have been coerced to NaT:
gauge 30
gauge 31
gauge 32
gauge 33
Some dates were invalid and have been coerced to NaT:
gauge 34
gauge 35
gauge 36
gauge 37
Some dates were invalid and have been coerced to NaT:
gauge 38
gauge 39
gauge 40
gauge 41
gauge 42
gauge 43
gauge 44
gauge 45
gauge 46
gauge 47
gauge 48
gauge 49
gauge 50
gauge 51
gauge 52
gauge 53
gauge 54
gauge 55
gauge 56
gauge 57
Some dates were invalid and have been coerced to NaT:
ga