In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from scipy.interpolate import interp1d

from plotting_functions import *

In [88]:
class precip_time_series:
    def __init__(self, data_path):

        self.data,self.statid = self.read_raw_data_as_pandas_df(data_path)
        
        self.padded = False

        self.events = None
        
        self.dimensionless_curves = None
        
        self.interpolated_events = None
        
    def read_raw_data_as_pandas_df(self, raw_data_file_path):

        # Read file with timestamp as index
        precip = pd.read_csv(raw_data_file_path, encoding="ISO-8859-1",index_col=1)

        # Timestamps str -> datetime
        precip.index = pd.to_datetime(precip.index)

        # Save ID of station
        station_id = str(precip.station.iloc[0])

        # Remove column with station ID
        precip = precip.drop("station",axis=1) 

        return precip,station_id
    
    def pad_and_resample(self,freq = '5min',pad_value = 0):
        # Resample the data to the specified frequency and pad missing values with pad_value
        self.data = self.data.resample(freq).sum().fillna(pad_value)
        self.padded = True

    def get_events(self,threshold='11h',min_duration = 30, min_precip = 1):
        
        if not self.padded:
            self.pad_and_resample()

        self.init_events(threshold)
        self.filter_events_by_length(min_duration)
        self.filter_events_by_amount(min_precip)

    def init_events(self,threshold):
        
        precip = self.data
        
        # Size of timesteps
        time_delta = precip.index[1]-precip.index[0]

        # Rolling 11 hour sum
        precip_sum = precip.rolling(threshold).sum()

        # dates with no precip last 11 hours
        dates_w_zero_sum = precip_sum.index[(precip_sum.mask(precip_sum!=0)==precip_sum).values[:,0]]

        # Add first date with rain
        for date in precip.index:
            if precip.loc[date].values[0] != 0:
                start_dates = [date]
                break

        # Save start and end dates
        end_dates   = []
        for date in tqdm(dates_w_zero_sum):
            if precip_sum.loc[date- time_delta].values[0]!=0:
                end_dates += [date- pd.to_timedelta(threshold)]
            if precip_sum.loc[date+ time_delta].values[0]!=0:
                start_dates += [date+ time_delta]
        
        # Add end to last event
        for date in reversed(precip.index):  # Iterate from last to first
            if precip.loc[date].values[0] != 0:  # Check if value is not zero
                end_dates += [date]
                break  # Stop at the first nonzero value
        
        # Save events as list of tuples
        events = []
        for i in range(len(end_dates)):
            events+=[(start_dates[i],end_dates[i])]

        # update events
        self.events = events


    def filter_events_by_length(self,min_duration):
        
        # Remove events with duration under min duration
        filtered_events = [event for event in self.events if event[1]-event[0]>=pd.Timedelta(minutes=min_duration)]

        # Update events
        self.events = filtered_events      
        
    def filter_events_by_amount(self,min_precip):
        
        # Remove events with total precip under minimum
        filtered_events = [event for event in self.events if self.data.loc[event[0]:event[1]].sum().values[0]>=min_precip]
        
        # update events
        self.events = filtered_events       
        
    def create_dimensionless_events(self):

        # Make sure events have been computed
        if self.events == None:
            self.get_events()
        
        # Make list of nparrays containing the values of the dimensionless curve
        dimensionless_curves = [self.get_dimensionless_curve(self.data.loc[event[0]:event[1]].values) for event in self.events]

        # Assign to global value
        self.dimensionless_curves = dimensionless_curves
 
    def create_interpolated_events(self, n):
        # Make sure events have been computed
        if self.dimensionless_curves == None:
            self.dimensionless_curves()
        
        # Make list of nparrays containing the values of the dimensionless curve
        interpolated_events = [self.get_interpolated_event(event, n) for event in self.dimensionless_curves]

        # Assign to global value
        self.interpolated_events = interpolated_events


    def get_dimensionless_curve(self,series):
    
        # Calculate cumulative rainfall
        cumulative_rainfall = np.cumsum(series)
        cumulative_rainfall = np.append([0],cumulative_rainfall)

        # normalize
        normalized_cumulative_rainfall = cumulative_rainfall/cumulative_rainfall[-1]

        return normalized_cumulative_rainfall

    
    def get_interpolated_event(self, series, n):
        # Calculate cumulative rainfall
        normalized_cumulative_rainfall = series

        # Define target points for bin_number bins
        target_points = np.linspace(0, 1, n+1)

        # Create interpolation function based on existing data points
        rainfall_times = np.array(range(0, len(normalized_cumulative_rainfall)))

        # Normalize time from 0 to 1
        normalized_time = (rainfall_times - rainfall_times[0]) / (rainfall_times[-1] - rainfall_times[0])
        interpolation_func = interp1d(normalized_time, normalized_cumulative_rainfall, kind='linear', fill_value="extrapolate")

        # Interpolate values at target points
        interpolated_values = interpolation_func(target_points)
    
        return interpolated_values    

        
    def plot_specific_dimensionless_curve(self,event_idx, plot_boundaries=False):
        plt.figure()

        x_values = np.linspace(0,1,len(self.dimensionless_curves[event_idx]))

        # Define time intervals
        total_duration = x_values[-1]  # e.g., 270 minutes
        boundaries = np.linspace(0, total_duration, 6)  # 6 boundaries -> 5 segments

        plt.plot(x_values,self.dimensionless_curves[event_idx], label = f"Event: {event_idx+1}")
        plt.scatter(x_values,self.dimensionless_curves[event_idx], label = f"Event: {event_idx+1}")
        for marker in boundaries:
                plt.axvline(marker, color='red', linestyle='--', label='Fifth Boundary' if marker==boundaries[1] else "")
        plt.legend()
        plt.title("dimensionless curves")      
        
#     def return_specific_event(self,event_idx):

#         # Size of timesteps
#         time_delta = self.data.index[1] - self.data.index[0]
#         time_delta_minutes = time_delta.seconds / 60

#         # Extract event data
#         event = self.data.loc[self.events[event_idx][0]:self.events[event_idx][1]]

#         return event            
    
#     def return_specific_dimensionless_event(self,event_idx):

#         # Size of timesteps
#         time_delta = self.data.index[1] - self.data.index[0]
#         time_delta_minutes = time_delta.seconds / 60
        
#         # Extract event data
#         event = self.dimensionless_curves[event_idx]

#         return event       
    
#     def return_specific_interpolated_event(self,event_idx):
        
#         # Extract event data
#         event = self.interpolated_events[event_idx]

#         return event   
    

# =========================================================
# Funtions
# =========================================================
class rainfall_analysis:
    def __init__(self, ts: precip_time_series):
        self.ts = ts
        self.metrics = {} 
        
        # Prepere ts for analysis
        if not self.ts.padded:
            ts.pad_and_resample()

        if ts.events == None:
            ts.get_events()
                    
        if ts.dimensionless_curves == None:
            ts.create_dimensionless_curves()
            
#         if ts.interpolated_events == None:
#             ts.create_interpolated_events(5)

#     def huff_quantile(self,ts):
#         # ts is the cumulative rainfall  
#         # Convert back to incrementral
#         raw_rainfall = np.diff(ts)
#         # Find the part with the most rainfall
#         quantile = np.argmax(raw_rainfall)+1

        #return quantile

    def get_interpolated_event(self, series, n):
        # Calculate cumulative rainfall
        normalized_cumulative_rainfall = series

        # Define target points for bin_number bins
        target_points = np.linspace(0, 1, n+1)

        # Create interpolation function based on existing data points
        rainfall_times = np.array(range(0, len(normalized_cumulative_rainfall)))

        # Normalize time from 0 to 1
        normalized_time = (rainfall_times - rainfall_times[0]) / (rainfall_times[-1] - rainfall_times[0])
        interpolation_func = interp1d(normalized_time, normalized_cumulative_rainfall, kind='linear', fill_value="extrapolate")

        # Interpolate values at target points
        interpolated_values = interpolation_func(target_points)
    
        return interpolated_values        
    

    def huff_quantile(self,ts):
        interpolated = self.get_interpolated_event(ts,4)
        quantile = np.argmax(interpolated)+1

        return quantile    
    
    def get_metrics(self):

        padded_precip = self.ts.data
        events_list = self.ts.events
                
        dimensionless_cumulative_events  = self.ts.dimensionless_curves
        interpolated_events  = self.ts.interpolated_events
        
        # resolution    
        time_delta = padded_precip.index[1]-padded_precip.index[0]
        time_delta_minuts = time_delta.seconds/60

        #####################################
        # Rainfall metrics
        #####################################

        # huff quantiles
        self.metrics["huff_quantile"] = np.array([[self.huff_quantile(event) for event in dimensionless_cumulative_events]])
        
#         self.metrics["huff_quantile"] = np.array([[self.huff_quantile(padded_precip[event[0]:event[1]])] for event in events_list])

        

In [89]:
# Analysis
analysis = rainfall_analysis(ts)
analysis.get_metrics()

In [90]:
# Path to data file
raw_data_file =  "/nfs/a319/gy17m2a/PhD/datadir/DanishRainData/Sample1.csv"

# Load the full time series from one rain gauge
ts = precip_time_series(raw_data_file)
 
# Pad and resample to 5minutes
ts.pad_and_resample('5min')

# Split the time series into events
ts.get_events()

# Get dimensionless versions of events
ts.create_dimensionless_events()

# # Get interpolated versions of events
# ts.create_interpolated_events(n=5)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25604/25604 [00:03<00:00, 6875.41it/s]


In [15]:
## Get examples for one event
event_idx  = 1
event = ts.return_specific_event(event_idx)
dimensionless_cumulative_event =  ts.return_specific_dimensionless_event(event_idx)
interpolated_dimensionless_cumulative_event =  ts.return_specific_interpolated_event(event_idx)

## could also just do
ts.interpolated_events[event_idx]

array([0.        , 0.00487805, 0.05853659, 0.18243902, 0.8195122 ,
       1.        ])

In [18]:
# fig, axs = plt.subplots(4, 1, figsize=(5, 12))  
# plt.subplots_adjust(hspace=0.5)

# # # Generate the various processed data.
# # cumulative_rainfall, cumulative_rainfall_times = create_cumulative_event(sample1)
# # dimensionless_cumulative_rainfall, dimensionless_times = create_dimensionless_event(cumulative_rainfall, cumulative_rainfall_times)
# # interpolated_n_cumulative_rainfall, interpolated_n_times = interpolate_rainfall(dimensionless_cumulative_rainfall, n)
# # interpolated_n_incremental_rainfall = create_incremental_event(interpolated_n_cumulative_rainfall)

# # Plot each element in its own axis.
# plot_raw_data(event['Nedbør (mm)'].values, axs[0])
# plot_dimensionless_cumulative(event['Nedbør (mm)'].values, dimensionless_cumulative_event, axs[1], False)
# plot_dimensionless_cumulative(event['Nedbør (mm)'].values, dimensionless_cumulative_event, axs[2], boundaries_boolean =True)
# plot_interpolated_cumulative(interpolated_dimensionless_cumulative_event, axs[3])
# # plot_incremental_rainfall(interpolated_n_incremental_rainfall, axs[4], labels=labels)

# # Adjust tick parameters for all subplots.
# for ax in axs:
#     ax.tick_params(axis='both', labelsize=14)