In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle

def is_multiple_of_5(lst):
    return len(lst) % 5 == 0

In [3]:
def find_part_with_most_rain_using_cumulative_rainfall(ax, array, n, plot=False):
    # Calculate the differences in array for cumulative rainfall
    array_diff = np.diff(array)
    
    # Split array into n parts
    splits = np.array_split(array_diff, n)
    
    max_array_rainfall = 0
    max_array_num = None
    
    total_precipitations = []  # To store total precipitation for each split
    split_ranges = []  # To store start and end indices for each split
    
    # Calculate total precipitation for each split
    split_start = 0
    for split in splits:
        total_precipitation = split.sum()
        total_precipitations.append(total_precipitation)
        split_end = split_start + len(split)
        split_ranges.append((split_start, split_end))
        if total_precipitation > max_array_rainfall:
            max_array_num = len(total_precipitations)
            max_array_rainfall = total_precipitation
        split_start = split_end
    
    colors = ['lightblue'] * n  # Default color for all splits
    highlight_color = 'yellow'  # Color for the split with the most rainfall
    
    if plot:
        # Plot the array
        ax.plot(range(1, len(array_diff) + 1), array_diff, label='Precipitation', marker='o')
        
        # Add vertical lines and shading for each split segment
        for i, (start_index, end_index) in enumerate(split_ranges):
            color = highlight_color if (i + 1) == max_array_num else colors[i]
            
            # Add vertical lines at the start and end of each split
            ax.axvline(x=start_index + 1, color=color, linestyle='--', label=f'Split {i+1} Start' if i == 0 or (i + 1) == max_array_num else "")
            ax.axvline(x=end_index, color=color, linestyle='--', label=f'Split {i+1} End' if i == 0 or (i + 1) == max_array_num else "")
            
            # Shade the region for the split
            ax.fill_between(range(start_index + 1, end_index + 1), array_diff[start_index:end_index], color=color, alpha=0.3)
            
            # Add the total precipitation value behind the shading
            ax.text((start_index + end_index) / 2+0.5, max(array_diff) * 0.05,  # Adjust y-position if needed
                    f'{total_precipitations[i]:.2f}',
                    ha='center', va='center', fontsize=10, color='black', weight='bold', zorder=1)
        
        ax.set_title(f'Precipitation Values with Splits Marked. Max at {max_array_num}')
        ax.set_xlabel('Time')
        ax.set_ylabel('Precipitation')
    
    return max_array_num   

def find_part_with_most_rain(ax, array, n, plot=False):
    # Compute differences
    # Split the array into 5 equal parts
    splits = np.array_split(array, n)
    
    max_array_rainfall = 0
    max_array_num = None
    
    total_precipitations = []  # To store total precipitation for each split
    split_ranges = []  # To store start and end indices for each split
    
    # Calculate total precipitation for each split
    split_start = 0
    for split in splits:
        total_precipitation = split.sum()
        total_precipitations.append(total_precipitation)
        split_end = split_start + len(split)
        split_ranges.append((split_start, split_end))
        if total_precipitation > max_array_rainfall:
            max_array_num = len(total_precipitations)
            max_array_rainfall = total_precipitation
        split_start = split_end
    
    colors = ['lightblue'] * n  # Default color for all splits
    highlight_color = 'yellow'  # Color for the split with the most rainfall
    
    if plot:
        # Plot the array
        ax.plot(range(1, len(array) + 1), array, label='Precipitation', marker='o')
        
        # Add vertical lines and shading for each split segment
        for i, (start_index, end_index) in enumerate(split_ranges):
            color = highlight_color if (i + 1) == max_array_num else colors[i]
            
            # Add vertical lines at the start and end of each split
            ax.axvline(x=start_index + 1, color=color, linestyle='--', label=f'Split {i+1} Start' if i == 0 or (i + 1) == max_array_num else "")
            ax.axvline(x=end_index, color=color, linestyle='--', label=f'Split {i+1} End' if i == 0 or (i + 1) == max_array_num else "")
            
            # Shade the region for the split
            ax.fill_between(range(start_index + 1, end_index + 1), array[start_index:end_index], color=color, alpha=0.3)
            
            # Add the total precipitation value behind the shading
            ax.text((start_index + end_index) / 2+0.5, max(array) * 0.05,  # Adjust y-position if needed
                    f'{total_precipitations[i]:.2f}',
                    ha='center', va='center', fontsize=10, color='black', weight='bold', zorder=1)
        
        ax.set_title(f'Precipitation Values with Splits Marked. Max at {max_array_num}')
        ax.set_xlabel('Time')
        ax.set_ylabel('Precipitation')

    # return max_array_num

### read in some data

In [4]:
with open(f"/nfs/a319/gy17m2a/PhD/ProcessedData/Profiles/UKCP18_30mins/bc005/profiles_df.pkl", 'rb') as f:
    ukcp18_present = pickle.load(f)    

FileNotFoundError: [Errno 2] No such file or directory: '/nfs/a319/gy17m2a/PhD/ProcessedData/Profiles/UKCP18_30mins/bc005/profiles_df.pkl'

In [None]:
unique_value_counts = ukcp18_present['Loading_profile_normalised_rain'].value_counts()
# Calculate the total number of values in the column
total_count = unique_value_counts.sum()

# Convert counts to percentages
unique_value_percentages = round((unique_value_counts / total_count) * 100,1)

# Display the percentages
print(unique_value_percentages)

In [None]:
unique_value_counts = ukcp18_present['Loading_profile12'].value_counts()
# Calculate the total number of values in the column
total_count = unique_value_counts.sum()

# Convert counts to percentages
unique_value_percentages = round((unique_value_counts / total_count) * 100,1)

# Display the percentages
print(unique_value_percentages)

In [None]:
row_num =125249 #47677 #1014
fig,axs=plt.subplots(ncols=2,nrows=2, figsize=(18,8))
find_part_with_most_rain_using_cumulative_rainfall(axs[0,0], ukcp18_present['normalized_interpolated_rainfall_15'][row_num], 5, True)
find_part_with_most_rain_using_cumulative_rainfall(axs[0,1], ukcp18_present['normalized_interpolated_rainfall_12'][row_num], 5, True)
find_part_with_most_rain_using_cumulative_rainfall(axs[1,0], ukcp18_present['normalized_rainfall'][row_num], 5, True)
find_part_with_most_rain(axs[1,1], ukcp18_present['precip'][row_num], 5, True)

In [None]:
row_num =125220 #125249 #47677 #1014
fig,axs=plt.subplots(ncols=2,nrows=2, figsize=(18,8))
find_part_with_most_rain_using_cumulative_rainfall(axs[0,0], ukcp18_present['normalized_interpolated_rainfall_15'][row_num], 5, True)
find_part_with_most_rain_using_cumulative_rainfall(axs[0,1], ukcp18_present['normalized_interpolated_rainfall_12'][row_num], 5, True)
find_part_with_most_rain_using_cumulative_rainfall(axs[1,0], ukcp18_present['normalized_rainfall'][row_num], 5, True)
find_part_with_most_rain(axs[1,1], ukcp18_present['precip'][row_num], 5, True)

# Find column with different answers for raw rain and normalised rain

In [None]:
test = ukcp18_present.loc[~(ukcp18_present['Loading_profile_raw_rain'] == ukcp18_present['Loading_profile_normalised_rain'])]
test

## Hmm

These plots are all plotting non-cumulative rainfall, but in all of the normalised/interpolated ones the first low values has been lost.

In [None]:
row_num =8 #125249 #47677 #1014
fig,axs=plt.subplots(ncols=2,nrows=2, figsize=(18,8))
find_part_with_most_rain_using_cumulative_rainfall(axs[0,0], test_div5['normalized_interpolated_rainfall_15'][row_num], 5, True)
find_part_with_most_rain_using_cumulative_rainfall(axs[0,1], test_div5['normalized_interpolated_rainfall_12'][row_num], 5, True)
find_part_with_most_rain_using_cumulative_rainfall(axs[1,0], test_div5['normalized_rainfall'][row_num], 5, True)
find_part_with_most_rain(axs[1,1], test_div5['precip'][row_num], 5, True)

### Raw rainfall has 5 values, the second value is the biggest, so this should definitely be F1

In [None]:
def create_normalised_event(rainfall):
    # Check if the input array is empty or the last value is zero
    if len(rainfall) == 0 or rainfall[-1] == 0:
        print("Empty array or total rainfall is zero. Cannot normalize.")
        return rainfall  # Return the input as-is, or handle appropriately

    # Normalize cumulative rainfall from 0 to 1
    normalized_rainfall = rainfall / rainfall[-1]

    # Debug prints to check the input and output
    print(f"Original rainfall: {rainfall}")
    print(f"Normalized rainfall: {normalized_rainfall}")

    return normalized_rainfall


def create_normalised_event(rainfall):
    # Check if the input array is empty or the maximum value is zero
    if len(rainfall) == 0 or np.max(rainfall) == 0:
        print("Empty array or maximum rainfall is zero. Cannot normalize.")
        return rainfall  # Return the input as-is, or handle appropriately

    # Normalize rainfall from 0 to 1 using the maximum value
    normalized_rainfall = rainfall / np.max(rainfall)

    # Debug prints to check the input and output
    print(f"Original rainfall: {rainfall}")
    print(f"Normalized rainfall: {normalized_rainfall}")

    return normalized_rainfall

def create_cumulative_event(rainfall):
    
    # Calculate cumulative rainfall
    cumulative_rainfall = np.cumsum(rainfall)
    
    return cumulative_rainfall

def interpolate_rainfall(rainfall, bin_number):

    # Define target points for 12 bins
    target_points = np.linspace(0, 1, bin_number)
    
    # Create interpolation function based on existing data points
    rainfall_times = np.array(range(0, len(rainfall)))

    # Normalize time from 0 to 1
    normalized_time = (rainfall_times - rainfall_times[0]) / (rainfall_times[-1] - rainfall_times[0])
    interpolation_func = interp1d(normalized_time, rainfall, kind='linear', fill_value="extrapolate")
    
    # Interpolate values at target points
    interpolated_values = interpolation_func(target_points)
    
    return interpolated_values


def create_incremental_event(cumulative_rainfall):
    
    raw_rainfall = np.diff(cumulative_rainfall, prepend=0)
    raw_rainfall[0] = cumulative_rainfall[0]
    return raw_rainfall
    

from scipy.interpolate import interp1d
raw_rainfall = test['precip'][125234]
normalised_rainfall = create_normalised_event(raw_rainfall)
cumulative_normalised_rainfall = create_cumulative_event(normalised_rainfall)
interpolated15_cumulative_normalised_rainfall = interpolate_rainfall(cumulative_normalised_rainfall,15)
interpolated12_cumulative_normalised_rainfall = interpolate_rainfall(cumulative_normalised_rainfall,12)
interpolated15_incremental_normalised_rainfall = create_incremental_event(interpolated15_cumulative_normalised_rainfall)
interpolated12_incremental_normalised_rainfall = create_incremental_event(interpolated12_cumulative_normalised_rainfall)

incremental_normalised_rainfall = create_incremental_event(cumulative_normalised_rainfall)

In [None]:
interpolated12_incremental_normalised_rainfall

In [None]:
fig,axs=plt.subplots(ncols=2,nrows=2, figsize=(18,8))
find_part_with_most_rain(axs[0,0], raw_rainfall, 5, True)
find_part_with_most_rain(axs[0,1], normalised_rainfall, 5, True)
find_part_with_most_rain(axs[1,0], interpolated12_incremental_normalised_rainfall, 5, True)
find_part_with_most_rain(axs[1,1], interpolated15_incremental_normalised_rainfall, 5, True)


In [None]:
### Why are we calculating cumulative rainfall?
### Why are we normalising rainfall 0-1?
# Can we interpolate rainfall without doing these things?