In [1]:
import numpy as np
import iris
from scipy.stats import expon, kstest
import matplotlib.pyplot as plt
import glob
import iris.plot as iplt

In [2]:
def estimate_tb0(rainfall_data, threshold, max_tb0_iterations=100, tolerance=0.01):
    # Identify rainy and dry periods
    rainy_periods = identify_rainy_periods(rainfall_data, threshold)
    
    # Initialize parameters
    tb0 = None
    iteration = 0
    
    # Iterate to estimate tb0
    while iteration < max_tb0_iterations:
        # Calculate inter-event times assuming tb0
        inter_event_times = calculate_inter_event_times(rainy_periods, tb0)
        
        # Test if inter-event times follow exponential distribution
        if is_exponential_distribution(inter_event_times, tolerance):
            return tb0
        
        # Estimate new tb0 based on the mean inter-event time
        tb0 = np.mean(inter_event_times)
        iteration += 1
    
    # If max iterations reached without convergence, return None
    return None


def calculate_inter_event_times(rainy_periods):
    """
    Calculate inter-event times between rainy periods.

    Parameters:
    - rainy_periods: List of tuples representing start and end times of rainy periods.

    Returns:
    - inter_event_times: List of inter-event times.
    """
    inter_event_times = []
    end_time_prev = 0

    # Calculate inter-event times based on the start and end times of rainy periods
    # time periods are one hour
    for start_time, end_time in rainy_periods:
        # Find the time between the start time of a rainy event, and the end time of the previous event
        inter_event_time = start_time - end_time_prev
        # Add it to a list of inter_event_times
        inter_event_times.append(inter_event_time)
        # Update the end_time_prev parameter to be the end time of this event
        end_time_prev = end_time

    return inter_event_times

def is_exponential_distribution(data, tolerance):
    # Fit data to exponential distribution
    _, p_value = expon.fit(data)
    print( p_value > tolerance)
    # Test if p-value of fit is within tolerance
    return p_value > tolerance

def is_exponential_distribution2(data, alpha=0.02):
    # Fit an exponential distribution to the data
    scale = 1 / np.mean(data)
    exponential_dist = expon(scale=scale)
    
    # Perform the Kolmogorov-Smirnov test
    _, p_value = kstest(data, exponential_dist.cdf)
    
    # Compare the p-value to the significance level alpha
    print( p_value > alpha)
#     return p_value > alpha

def another_exponential_test(inter_event_times):
    # inter_event_times = np.diff(np.where(np.diff(np.concatenate(([True], dry_spell_durations > 0, [True]))) == 1)[0])
    inter_event_times = np.array(inter_event_times)
    # Fit exponential distribution to inter-event times
    params = expon.fit(inter_event_times)
    lambda_fit = params[1]  # Estimated lambda parameter of the exponential distribution

    # Perform Kolmogorov-Smirnov test
    ks_statistic, p_value = kstest(inter_event_times, 'expon', args=params)

    # Generate x values for the fitted exponential distribution
    x_values = np.linspace(0, inter_event_times.max(), 100)
    # Calculate corresponding y values using the fitted distribution parameters
    y_values = expon.pdf(x_values, *params)

    # Plot histogram of inter-event times
    plt.hist(inter_event_times, bins=10, density=True, alpha=0.7, color='blue', label='Inter-event times histogram')

    # Plot fitted exponential distribution
    plt.plot(x_values, y_values, color='red', label='Fitted exponential distribution')

    plt.xlabel('Inter-event Time (days)')
    plt.ylabel('Probability Density')
    plt.title('Inter-event Time Distribution of Dry Spells')
    plt.legend()
    plt.grid(True)
    plt.show()

    print("Estimated lambda parameter of the exponential distribution:", lambda_fit)
    print("Kolmogorov-Smirnov test statistic:", ks_statistic)
    print("P-value:", p_value)

# Function to plot inter-event times
def plot_inter_event_times(inter_event_times):
    """
    Plot the distribution of inter-event times.

    Parameters:
    - inter_event_times: List or array containing inter-event times.
    """
    # Plot histogram of inter-event times
    plt.figure(figsize=(8, 6))
    plt.hist(inter_event_times, bins=30, density=True, color='skyblue', edgecolor='black', alpha=0.7)
    plt.title('Distribution of Inter-Event Times')
    plt.xlabel('Inter-Event Time')
    plt.ylabel('Density')
    plt.grid(True)
    plt.show()
    
def identify_rainy_periods(rainfall_data, is_raining_threshold,dry_spell_threshold):
    rainy_periods = []  # Initialize list to store rainy periods
    within_rainfall_period = False  # Flag to track if it's raining
    it_rained_last_time = False  # Flag to track if it's raining
    start_time = None   # Variable to store start time of rainy period
    dry_spell_count = 0  # Counter for consecutive timesteps without rain
    
    # Iterate through each timestep's rainfall value
    for time, precipitation in enumerate(rainfall_data):
        # print(time, precipitation)

        # If precipitation exceeds threshold....
        if precipitation >= is_raining_threshold:
            # print("It's raining")
            # If it's not already raining and precipitation exceeds threshold, start a new rainy period
            if within_rainfall_period == False:
                start_time = time  # Record start time of rainy period
                within_rainfall_period = True  # Set flag to indicate it's raining
                it_rained_last_time = True
                dry_spell_count = 0  # Reset dry spell counter
            if within_rainfall_period == True and it_rained_last_time == False:
                dry_spell_count = 0 
                
        # If it isn't raining now, but we're currently within a rainfall event
        elif within_rainfall_period == True:
            
            #print("its not raining, but we are still within a rainfall event")
            it_rained_last_time = False
#             if it_rained_last_time == True and dry_spell_count == 1:
#                 dry_spell_count = 0
            
            # Increment dry spell counter
            # But only if 
            dry_spell_count += 1
            # print(f"the dry spell counter is on {dry_spell_count}")
            
            # Check if dry spell threshold reached
            if dry_spell_count >= dry_spell_threshold:
                # print("we can no longer consider this to be part of the rainy period")
                # End current rainy period and record its start and end times
                # print(start_time, time - 1)
                rainy_periods.append((start_time, time - 1))
                within_rainfall_period = False  # Reset flag to indicate it's not raining
            
        # If it isn't raining now, and we're not currently within a rainfall event
        else:
            # print ("its not raining now and we aren't within a rainfall event ")
            pass  # No action needed, continue iterating

    # Check if last period was rainy
    if within_rainfall_period == True:
        # If raining at the end of the data, consider it as the end time of the last rainy period
        rainy_periods.append((start_time, len(rainfall_data) - 1))
    
    return rainy_periods

## Get one year of UKCP18 30 mins data as a timeseries 

In [3]:
em = 'bc005'
datadir = f'/nfs/a319/gy17m2a/PhD/ProcessedData/TimeSeries/UKCP18_every30mins/2.2km/2002_2020/{em}/'

filenames =[]
for yr in range(2001,2020):
    file_name = datadir + f"{yr}_maskedcube.nc"
    filenames.append(file_name)
monthly_cubes_list = iris.load(filenames)
model_cube = monthly_cubes_list.concatenate_cube()   

### Select one cell

In [4]:
rainfall_data = model_cube[:,200,300].data
len(rainfall_data)

82080

### Check location of the cell

In [263]:
# model_cube_copy = model_cube.copy()
# model_cube_copy.data[:] = 0
# model_cube_copy.data[:,250,200] = 500
# model_cube_copy.data[:,200,300] =500

# # Check the plotting
# fig, ax = plt.subplots(figsize=(20,10))
# iplt.contourf(model_cube_copy[10])
# plt.gca().coastlines(resolution='10m', color='black', linewidth=0.5);

### Threshold

In [113]:
its_raining_thresh = 0.1

In [18]:
def calculate_cv(dry_periods):
    # Calculate mean of dry periods
    mean_dry_period = np.mean(dry_periods)
    
    # Calculate variance of dry periods
    variance_dry_period = np.var(dry_periods)
    
    # Number of dry periods
    n = len(dry_periods)
    
    # Calculate CV using the formula
    cv = np.sqrt((n / (n - 1)) * (variance_dry_period / mean_dry_period**2))
    return cv


def calculate_cv_molly(dry_periods):
    # Calculate mean of dry periods
    mean_dry_period = np.mean(dry_periods)
    
    # Calculate variance of dry periods
    variance_dry_period = np.var(dry_periods)
    
    # Number of dry periods
    n = len(dry_periods)
    
    # Calculate CV using the formula
    cv = np.sqrt( (variance_dry_period - (mean_dry_period**2)) *(n/(n-1)) )/mean_dry_period
    return cv

### Detect Rainy Periods: 
Iterate through the rainfall data, identifying consecutive time intervals (e.g., hours, days) where the precipitation exceeds the threshold.  
Record the start and end times of each rainy period.

In [7]:
# rainfall_data2 = np.array([0,0.1,0.2, 0.3, 0, 0.1, 0.2, 0.3])

In [11]:
tbo = 1

In [14]:
rainy_periods = identify_rainy_periods(rainfall_data, is_raining_threshold = 0.0001, dry_spell_threshold= 0)
inter_event_times = calculate_inter_event_times(rainy_periods)
filtered_inter_event_times = [time for time in inter_event_times if time >= tbo]
# plot_inter_event_times(filtered_inter_event_times)
# np.unique(filtered_inter_event_times, return_counts=True)

In [21]:
mean_time_diff = np.mean(inter_event_times)
std_time_diff = np.std(inter_event_times)

# Calculate the Coefficient of Variation (CV)
CV = (std_time_diff / mean_time_diff) * 100
CV

196.15375215878555

In [19]:
optimal_tb0 = calculate_cv(inter_event_times)
optimal_tb0

1.9618323787893084

In [20]:
optimal_tb0 = calculate_cv_molly(inter_event_times)
optimal_tb0

1.6877457216686746

In [278]:
# Function to calculate coefficient of variation (CV)
def calculate_cv(data):
    mean = np.mean(data)
    std_dev = np.std(data)
    cv = std_dev / mean
    return cv

# Function to simulate behavior of CV for different threshold values
def simulate_cv_threshold(data, threshold_values):
    cv_values = []
    for threshold in threshold_values:
        # Filter dry periods based on threshold
        dry_periods = [duration for duration in data if duration >= threshold]
        # Calculate CV for filtered dry periods
        cv = calculate_cv(dry_periods)
        cv_values.append(cv)
    return cv_values

# Example data (dry period durations)
dry_periods_data = [3, 5, 4, 7, 2, 6, 8, 9, 10, 11, 15,11, 9, 8, 7, 2, 2, 3, 4]
# dry_periods_data = inter_event_times
dry_periods_data = exponential_data

# Example threshold values to iterate over
threshold_values = range(1, max(dry_periods_data))

# Simulate behavior of CV for different threshold values
cv_values = simulate_cv_threshold(dry_periods_data, threshold_values)

# # Find the index of the minimum CV value
optimal_threshold_index = np.argmin(cv_values)
optimal_threshold = threshold_values[optimal_threshold_index]
optimal_cv = cv_values[optimal_threshold_index]

print("Optimal Threshold Value:", optimal_threshold)
print("Optimal Coefficient of Variation (CV):", optimal_cv)


Optimal Threshold Value: 9
Optimal Coefficient of Variation (CV): 0.0


### Test different tbo values and check whether they resul in exponentially distributed inter-event times

In [163]:
threshold_values = range(0,10)
cv_values = simulate_cv_threshold(inter_event_times, threshold_values)

# # Find the index of the minimum CV value
optimal_threshold_index = np.argmin(cv_values)
optimal_threshold = threshold_values[optimal_threshold_index]
optimal_cv = cv_values[optimal_threshold_index]

print("Optimal Threshold Value:", optimal_threshold)
print("Optimal Coefficient of Variation (CV):", optimal_cv)

Optimal Threshold Value: 9
Optimal Coefficient of Variation (CV): 1.3511291665757628


### New CV method

In [165]:
# Calculate the mean (first moment)
mean = np.mean(inter_event_times)

# Calculate the variance (second moment)
variance = np.var(inter_event_times)

print("Mean (First Moment):", mean)
print("Variance (Second Moment):", variance)

Mean (First Moment): 60.81581798483207
Variance (Second Moment): 9157.20334675777


In [307]:
hist, _ = np.histogram(inter_event_times, bins=np.arange(min(inter_event_times), max(inter_event_times) + 2))
hist[0]

678

In [234]:
def calculate_cv(inter_event_times, tb0):
    # Calculate the histogram with a bin width of 1
    hist, _ = np.histogram(inter_event_times, bins=np.arange(min(inter_event_times), max(inter_event_times) + 2))
    
    # Calculate N1
    # N1: The variable N1 represents the effective number of inter-event times considered in the calculation. 
    # It starts with the total number of inter-event times and decreases as the algorithm progresses through 
    # different values of tb0. Specifically, it represents the total number of inter-event times excluding the dry 
    # periods of length tb0.
    N1 = len(inter_event_times) - hist[0]
    
    # Calculate S1 and S2 for the initial tb0 (0)
    S1 = np.sum(np.cumsum(hist) * np.arange(len(hist)))
    S2 = np.sum(np.cumsum(hist) * np.arange(len(hist)) ** 2)
    
    # Calculate CV for tb0 = 0
    CV = np.sqrt((S2 - (S1 ** 2) / N1) / (N1 / (N1 - 1)) ) / (S1 / N1)
    
    # Iterate for other values of tb0
    for k in range(5, tb0):
        print(f"tbo is {k}")
        # Update N1
        N1 = N1 - hist[k]
        
        # Update S1 and S2
        S1 = (N1 - 1) * S1 / (N1 - hist[k])
        print(f"S1 is {S1}")
        
        S2 = (N1 - 1) * S2 / (N1 - hist[k]) - hist[k] * k ** 2
        print(f"S2 is {S2}")
        # Calculate CV for current tb0
        print((S2 - (S1 ** 2) / N1) / (N1 / (N1 - 1)))
        current_CV = np.sqrt((S2 - (S1 ** 2) / N1) / (N1 / (N1 - 1))) / (S1 / N1)
        
        # Check if CV has stabilized
        if np.isclose(CV, current_CV):
            return k
        
        CV = current_CV
    
    return tb0

# Example value for tb0
tb0 = 100

# Calculate the optimal value for tb0
optimal_tb0 = calculate_cv(inter_event_times, tb0)
print("Optimal tb0:", optimal_tb0)

tbo is 5
S1 is 1355371151.954852
S2 is 910464540375.1635
-902695745768134.9
tbo is 6
S1 is 1413354944.551851
S2 is 949414892127.6196
-1022397216459200.9
tbo is 7
S1 is 1458076486.71443
S2 is 979456386096.086
-1122098952053784.9
tbo is 8
S1 is 1504000155.5873253
S2 is 1010305402482.6084
-1231024992575733.8
tbo is 9
S1 is 1544648808.4410367
S2 is 1037610950013.0573
-1333380863577634.0
tbo is 10
S1 is 1581836477.9337451
S2 is 1062591596987.7697
-1432045589609412.2
tbo is 11
S1 is 1618888503.543004
S2 is 1087481125050.1859
-1535137708489499.0
tbo is 12
S1 is 1656699052.4886918
S2 is 1112880186890.7974
-1645348952566286.8
tbo is 13
S1 is 1698534887.147497
S2 is 1140983214984.2922
-1773164559214460.5
tbo is 14
S1 is 1736814810.553912
S2 is 1166697575297.5454
-1896019251412258.0
tbo is 15
S1 is 1763899641.907579
S2 is 1184891683907.205
-1986892974391908.0
tbo is 16
S1 is 1790660652.305913
S2 is 1202868266635.1482
-2079566387631929.5
tbo is 17
S1 is 1823240057.7050097
S2 is 1224753328084.3538




In [187]:
tbo = 1
filtered_inter_event_times = [time for time in inter_event_times if time >= tbo]
len(filtered_inter_event_times)

923

## New method of finding inter-event times

In [217]:
def calculate_cv(inter_event_times):
    mean = np.mean(inter_event_times)
    variance = np.var(inter_event_times)
    std_dev = np.sqrt(variance)
    cv = std_dev / mean
    return cv

def iterate_over_tbo(inter_event_times, tbo_values):
    cv_values = []
    for tbo in tbo_values:
        # Filter inter-event times based on tbo
        filtered_inter_event_times = [time for time in inter_event_times if time >= tbo]
        cv = calculate_cv(filtered_inter_event_times)
        cv_values.append(cv)
    return cv_values

# Example usage
tbo_values = [1,2,3,4,5,6,7,8,9, 10, 11,12,13,14, 15,16,17,18,19, 20]  # Example tbo values
cv_values = iterate_over_tbo(inter_event_times, tbo_values)
print("Coefficient of Variation for different tbo values:", cv_values)

Coefficient of Variation for different tbo values: [2.2165870425519243, 2.2165870425519243, 2.010725270546958, 1.8633377856224445, 1.749506661474289, 1.6654139708331237, 1.6023595452051256, 1.5379481231353667, 1.4866581906549154, 1.4485338039575617, 1.4111302547429452, 1.3791496378526726, 1.3508393853796115, 1.323576265168709, 1.2966370068992545, 1.2677844072095832, 1.241913259558922, 1.2237295705912454, 1.2061898508601132, 1.1855166704781845]


In [252]:
import numpy as np

def calculate_cv(inter_event_times, tb0):
    # Calculate the histogram with a bin width of 1
    hist, _ = np.histogram(inter_event_times, bins=np.arange(min(inter_event_times), max(inter_event_times) + 2))
    
    # Calculate N1
    N1 = len(inter_event_times) - hist[0]
    
    # Calculate S1 and S2 for the initial tb0 (0)
    S1 = np.sum(np.cumsum(hist) * np.arange(len(hist)))
    S2 = np.sum(np.cumsum(hist) * np.arange(len(hist)) ** 2)
    
    # Calculate CV for tb0 = 0
    CV = np.sqrt((S2 - (S1 ** 2) / N1) / (N1 / (N1 - 1)) ) / (S1 / N1)
    
    # Iterate for other values of tb0
    for k in range(1, tb0):
        # Update N1
        N1 = N1 - hist[k]
        
        # Update S1 and S2
        S1 = (N1 - 1) * S1 / (N1 - hist[k])
        S2 = (N1 - 1) * S2 / (N1 - hist[k]) - hist[k] * k ** 2
        
        # Calculate CV for current tb0
        current_CV = np.sqrt((S2 - (S1 ** 2) / N1) / (N1 / (N1 - 1))) / (S1 / N1)
        
        
        if np.isclose(CV, current_CV):
            print("cv stablised")
        else:
            print("it isnt stabilising")
        
        # Check if CV has stabilized
        if np.isclose(CV, current_CV):
            return k
        
        CV = current_CV
    
    return tb0

# Example inter-event times
# Example value for tb0
tb0 = 10

# Calculate the optimal value for tb0
optimal_tb0 = calculate_cv(inter_event_times, tb0)
print("Optimal tb0:", optimal_tb0)


it isnt stabilising
it isnt stabilising
it isnt stabilising
it isnt stabilising
it isnt stabilising
it isnt stabilising
it isnt stabilising
it isnt stabilising
it isnt stabilising
Optimal tb0: 10


  from ipykernel import kernelapp as app


In [259]:
# Calculate the histogram with a bin width of 1
hist, _ = np.histogram(inter_event_times, bins=np.arange(min(inter_event_times), max(inter_event_times) + 2))

# Calculate N1
N1 = len(inter_event_times) - hist[0]
hist[0]

425

In [None]:
import numpy as np

def calculate_cv(inter_event_times, tb0):
    # Calculate the histogram with a bin width of 1
    hist, _ = np.histogram(inter_event_times, bins=np.arange(min(inter_event_times), max(inter_event_times) + 2))
    
    # Calculate N1
    N1 = len(inter_event_times) - hist[0]
    
    # Calculate S1 and S2 for the initial tb0 (0)
    S1 = np.sum(np.cumsum(hist) * np.arange(len(hist)))
    S2 = np.sum(np.cumsum(hist) * np.arange(len(hist)) ** 2)
    
    # Calculate CV for tb0 = 0
    CV = np.sqrt((S2 - (S1 ** 2) / N1) / (N1 / (N1 - 1)) ) / (S1 / N1)
    
    # Iterate for other values of tb0
    for k in range(1, tb0):
        # Update N1
        N1 = N1 - hist[k]
        
        # Update S1 and S2
        S1 = (N1 - 1) * S1 / (N1 - hist[k])
        S2 = (N1 - 1) * S2 / (N1 - hist[k]) - hist[k] * k ** 2
        
        # Calculate CV for current tb0
        current_CV = np.sqrt((S2 - (S1 ** 2) / N1) / (N1 / (N1 - 1))) / (S1 / N1)
        
        
        if np.isclose(CV, current_CV):
            print("cv stablised")
        else:
            print("it isnt stabilising")
        
        # Check if CV has stabilized
        if np.isclose(CV, current_CV):
            return k
        
        CV = current_CV
    
    return tb0

# Example inter-event times
# Example value for tb0
tb0 = 10

# Calculate the optimal value for tb0
optimal_tb0 = calculate_cv(inter_event_times, tb0)
print("Optimal tb0:", optimal_tb0)
