In [1]:
#%% LOAD PACKAGES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os.path
from os import path
from datetime import datetime, timedelta
from scipy.optimize import least_squares
import calendar
from dateutil.relativedelta import relativedelta
# import sklearn
# from sklearn.linear_model import LinearRegression


In [2]:
#%% SR CALCULATION
# INPUT
# sd_input: dataframe with daily catchment values for P, Ep, Q
# Si_0: initial interception storage = 0
# Si_max: maximum interception storage = 2.5mm
# date_start, date_end: start and end 'month-day' of time-series (depending on hydro-year)
# year_start, year_end: start and end year of time-series

# OUTPUT
# catchment: pandas dataframe with daily catchment values for P, Ep, Q, Pe, Et and Sd (based on initial Et estimate)

# SD
def sd_initial(sd_input, Si_0, Si_max, q_mean):

    #read csv file for catchment of interest
    # catchment = pd.read_csv(filename, sep=',', skiprows=0, index_col=0, skipinitialspace=True)
    # catchment.index = pd.to_datetime(catchment.index)
    sd_input = sd_input.loc[sd_input.date_start[0]:sd_input.date_end[0]]
    
    # soms is de start date eg 02-01 maar begint de timeseries pas 02-28: dan een jaar erbij optellen
    if sd_input.index[0]>sd_input.date_start[0]:
        sd_input.date_start = sd_input.date_start[0] + relativedelta(years=1)
    
    sd_input = sd_input.loc[sd_input.date_start[0]:sd_input.date_end[0]]
    
    # add columns for interception storage calculation
    sd_input['Si_1'] = np.nan
    sd_input['Pe'] = np.nan
    sd_input['Si_2'] = np.nan
    sd_input['Ei'] = np.nan
    sd_input['Si_3'] = np.nan
    sd_input['Et'] = np.nan
    sd_input['Sd'] = np.nan
    
    # convert to numpy arrays
    p = np.array(sd_input.p.values)
    # q = np.array(sd_input.Q.values)
    ep = np.array(sd_input.ep.values)
    
    si1 = np.zeros(len(sd_input))
    pe = np.zeros(len(sd_input))
    si2 = np.zeros(len(sd_input))
    ei = np.zeros(len(sd_input))
    si3 = np.zeros(len(sd_input))
    et = np.zeros(len(sd_input))
    sd = np.zeros(len(sd_input))
    
    #calculate interception storage and effective precipitation for all timesteps
    for l in range(1,len(si1)):
        si1[0] = p[0] + Si_0
        pe[0] = max(0,si1[0]-Si_max)
        si2[0] = si1[0] - pe[0]
        ei[0] = min(si2[0],ep[0])
        si3[0] = si2[0] - ei[0]
    
        si1[l] = p[l] + si3[l-1]
        pe[l] = max(0,si1[l]-Si_max)
        si2[l] = si1[l] - pe[l]
        ei[l] = min(si2[l],ep[l])
        si3[l] = si2[l] - ei[l]
    
    #water balance Et calculation (Et = Pe-Q)
    Pe_mean = np.mean(pe)
    EP_mean = np.mean(ep)
    Q_mean = q_mean
    Et_mean = Pe_mean - Q_mean
    
    #calculate daily Et (EP(daily)*(Et_sum/EP_sum)) and Sd
    for l in range(1,np.size(sd_input.index)):
        #if Pe < Q -> kan niet!
        if Et_mean<0: 
            break            
        et[0] = ep[0]/EP_mean * Et_mean
        sd[0] = pe[0] - et[0]
    
        et[l] = ep[l]/EP_mean * Et_mean
        sd[l] = min(0,sd[l-1]+pe[l]-et[l])
        
    sd_input.Si_1 = si1
    sd_input.Si_2 = si2
    sd_input.Si_3 = si3
    sd_input.Pe = pe
    sd_input.Ei = ei
    sd_input.Sd = sd
    sd_input.Et = et
    
    # if(sd_input.Sd.mean()==0):
    #     sd_input.Sd=np.nan
    
    return sd_input


In [3]:
#%% function 2: Sr calculation based on return periods - INCLUDE MIN MAX APPROACH LIKE STIJN

# INPUT
# T: array of return periods of interest T=[2,5,10,15,20,30,40]
# Sd: dataframe of Sd calculated in sd_iterations function
# date_start, date_end: start and end 'month-day' of time-series (depending on hydro-year)
# year_start, year_end: start and end year of time-series
# it: amount of iterations
    
# OUTPUT
# Sd_T: storage deficits corresponding with return periods T
    

def sr_return_periods_minmax_rzyear(T,Sd,it,year_start,year_end,date_start,date_end):

    for j in range(len(T)):
        Sd = Sd*-1
        total_years = year_end - year_start
        years = range(year_start,year_end+1,1)
        
        # calculate annual max Sd - without iterations for hydro years
        Sd_max=[]
        Sd_maxmin = []
        for i in range(0,total_years,1):
            sd_max_i = max(Sd.loc[str(years[i])+'-'+str(date_start):str(years[i+1])+'-'+str(date_end)]) #max value
            Sd_max.append(sd_max_i) #append max deficit per year
            
            sd_max_ix = Sd.loc[str(years[i])+'-'+str(date_start):str(years[i+1])+'-'+str(date_end)].idxmax() #find index of max value
            sd_hystart_maxvalue = Sd.loc[str(years[i])+'-'+str(date_start):sd_max_ix] #timeseries from start hydroyear to index of max value
            min_value = min(sd_hystart_maxvalue) #find min value in timeseries before max value
            Sd_maxmin.append(sd_max_i-min_value) #append max-min sd per year
            
        # define root zone year
        sd_max_month = Sd.groupby(pd.Grouper(freq='M')).max() #calculate maximum sd per month
        sd_max_month_sum =  sd_max_month.groupby([sd_max_month.index.month]).sum() #sum max sd per month for full timeseries per month
        start_rz_year = sd_max_month_sum.idxmin() #define month where rz year starts
        date_start_rz_year = str(start_rz_year)+'-1'        
        if(start_rz_year==1):
            start_rz_year=13
        day_end_rz_year = calendar.monthrange(2010,start_rz_year-1)[1] #find last day of end month rz year
        date_end_rz_year = str(start_rz_year-1)+'-'+str(day_end_rz_year)
        
        # calculate annual max Sd - without iterations for rootzone years -> CHECK THIS APPROACH
        Sd_max_rz_year = []
        Sd_maxmin_rz_year = []
        for i in range(0,total_years,1):
            sd_max_i = max(Sd.loc[str(years[i])+'-'+str(date_start_rz_year):str(years[i+1])+'-'+str(date_end_rz_year)])
            Sd_max_rz_year.append(sd_max_i) #append max deficit per year
            
            sd_max_ix = Sd.loc[str(years[i])+'-'+str(date_start_rz_year):str(years[i+1])+'-'+str(date_end_rz_year)].idxmax() #find index of max value
            sd_hystart_maxvalue = Sd.loc[str(years[i])+'-'+str(date_start_rz_year):sd_max_ix] #timeseries from start rzyear to index of max value
            min_value = min(sd_hystart_maxvalue) #find min value in timeseries before max value
            Sd_maxmin_rz_year.append(sd_max_i-min_value) #append max-min sd per year
            
        # gumbel function
        def gumbel_r_mom(x):
            scale = np.sqrt(6)/np.pi * np.std(x)
            loc = np.mean(x) - np.euler_gamma*scale
            return loc, scale    
        
        loc1, scale1 = gumbel_r_mom(Sd_maxmin_rz_year)
                   
        # find Sd value corresponding with return period
        Sd_T = []
        for i in np.arange(0,len(T),1):
            p = 1-(1/T[i])
            y = -np.log(-np.log(p))
            x = scale1 * y + loc1
            Sd_T.append(x)
         
        return(Sd_T)   

In [4]:
#%% RUN SD CALCULATION

a = np.genfromtxt('/home/vanoorschot/work/fransje/scripts/GLOBAL_SR/catch_id_selected_lowercase_wb.txt',dtype='str')
names = a

fol = '/home/vanoorschot/work/fransje/scripts/GLOBAL_SR'

# q
q = pd.read_csv(f'{fol}/p_q_ep_timeseries_selected_catchments/mean_q_p_ep.csv', index_col=0)
        
for i in range(len(names)):
        # run initial Sd calculation
        catchment_ts = pd.read_csv(f'{fol}/p_ep_timeseries_selected_catchments/daily/{names[i]}.csv',index_col=0)
        catchment_ts.index = pd.to_datetime(catchment_ts.index)

        df_monthly = pd.DataFrame(index=pd.date_range(catchment_ts.index[0],catchment_ts.index[-1],freq='M'), columns=['p','ep'])
        df_monthly[['p','ep']] = catchment_ts[['p','ep']].groupby(pd.Grouper(freq="M")).sum()

        q_mean = q.loc[f'{names[i]}'].q # MAKE SURE CORRECT YEARS ARE USED FOR MEAN VALUES
        
        # calculate start hydroyear
        df_monthly_mean = df_monthly.groupby([df_monthly.index.month]).mean()
        wettest_month = (df_monthly_mean.p-df_monthly_mean.ep).idxmax()
        hydro_year_start_month = wettest_month+1
        if hydro_year_start_month==13:
            hydro_year_start_month=1
        
        start_year = catchment_ts.index.year[0]
        end_year = catchment_ts.index.year[-1]
        start_date = datetime(start_year,hydro_year_start_month,1)
        end_date = datetime(end_year,hydro_year_start_month,1)
        end_date = end_date - timedelta(days=1)
        
        # GSWP data
        sd_input = pd.DataFrame(index=catchment_ts.index, columns=['p','ep','date_start','date_end'])
        sd_input[['p','ep']] = catchment_ts[['p','ep']]
        sd_input[['date_start','date_end']] = start_date, end_date
        Si_0 = 0
        Si_max = 2.5
        out = sd_initial(sd_input, Si_0, Si_max, q_mean)
        out.to_csv(f'{fol}/sr_calculation/gswp_gleam/sd_catchments/'+str(names[i])+'.csv')
    
        # print(i)
        

In [9]:
#%% RUN SR CALCULATION
# SR
def sr_calc(names,fol):
    sr_df = pd.DataFrame(index=names, columns=['Sr_2','Sr_3','Sr_5','Sr_10','Sr_20','Sr_30','Sr_40','Sr_50','Sr_60'])
    for k in range(len(names)):
    # for k in range(1000):
        if(path.exists(f'{fol}/sr_calculation/gswp_gleam/sd_catchments/'+str(names[k])+'.csv')==True):  
            out = pd.read_csv(f'{fol}/sr_calculation/gswp_gleam/sd_catchments/'+str(names[k])+'.csv',index_col=0)
            out.index = pd.to_datetime(out.index)
            # run SR calculation based on intial Sd calculation (without iterations)
            T = [2,3,5,10,20,30,40,50,60]
            if out.empty:
                continue
            Sd = out.Sd
            if(np.isnan(Sd[0])):
                continue
            it=0
            year_start = out.index[0].year
            year_end = out.index[-1].year
            date_start = str(out.index[0].month)+'-'+str(out.index[0].day)
            date_end = str(out.index[-1].month)+'-'+str(out.index[-1].day)
            if(date_end=='2-29'):
                date_end='2-28'
            sr_T = sr_return_periods_minmax_rzyear(T,Sd,it,year_start,year_end,date_start,date_end)
            sr_df.loc[names[k],:] = sr_T
            # print(k)
    sr_df.to_csv(f'{fol}/sr_calculation/gswp_gleam/sr_df.csv')
    
    return(sr_df)

In [10]:
a = np.genfromtxt('/home/vanoorschot/work/fransje/scripts/GLOBAL_SR/catch_id_selected_lowercase_wb.txt',dtype='str')
names = a
fol = '/home/vanoorschot/work/fransje/scripts/GLOBAL_SR'

sr = sr_calc(names,fol)


In [12]:
sr

Unnamed: 0,Sr_2,Sr_3,Sr_5,Sr_10,Sr_20,Sr_30,Sr_40,Sr_50,Sr_60
br_0000078,110.54926,122.900463,136.657039,153.94266,170.523442,180.061958,186.786905,191.985565,196.224214
br_0003083,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
fr_0001029,164.265834,186.934675,212.182874,243.908125,274.339747,291.846308,304.188975,313.73036,321.509786
id_0000013,57.644383,76.158138,96.778466,122.688634,147.542289,161.839984,171.9203,179.712796,186.066292
za_0000059,315.733302,337.360425,361.448375,391.715732,420.748907,437.450977,449.226452,458.329374,465.751306
...,...,...,...,...,...,...,...,...,...
312061,133.803468,152.308611,172.919349,198.817466,223.659562,237.950606,248.026235,255.815106,262.165647
314207,212.486429,237.086591,264.485866,298.914018,331.938328,350.936398,364.330625,374.684909,383.12712
314213,373.733461,405.356027,440.576744,484.832814,527.284299,551.705591,568.923357,582.233392,593.085531
315450,212.716749,239.877664,270.129064,308.141012,344.602985,365.578658,380.367156,391.799269,401.120271
