In [None]:
# Calculate Daily Flood Index (adapted from repository flood_monitoring_dss by Mohammed Moishin)
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import time
from datetime import datetime
import math
import statistics
import base64
from tqdm.notebook import tqdm

def calculate_fi(uploaded_file):
    days_in_year = 365
    min_total_years = 50
    start_computation = True
    antecedent_period = 365
    weight = 0
    for x in range (1, antecedent_period + 1, 1):
        weight = weight + 1/x
    if uploaded_file is not None:
        df = uploaded_file
        #df = check_dates(df) this causes problems
        df = df.reset_index(drop=True)
        df["date"]=pd.to_datetime(df["date"])
        start_date = df["date"].iloc[0]
        end_date = df["date"].iloc[len(df.index) - 1]
        df['year'] = 0
        df['month'] = 0
        df['day'] = 0
        df['ep'] = 0.0
        df['awri'] = 0.0
        df['fi'] = 0.0
        if(start_computation == True):
            print("Deriving Day, Month and Year")
            df['year'] = df['date'].dt.year
            df['month'] = df['date'].dt.month
            df['day'] = df['date'].dt.day
            #df = df.reset_index(drop=True)
            print("Making Adjustments for Leap Year")
            # create boolean mask for leap year days
            leap_mask = (df.date.dt.month == 2) & (df.date.dt.day == 29) & (df.date.dt.is_leap_year)

            # concatenate daily rainfall values for leap year days with those of March 1st
            leap_indexes = df[leap_mask].index
            next_day_indexes = leap_indexes + 1
            df.loc[next_day_indexes, "daily_rain"] += df.loc[leap_indexes, "daily_rain"].values

            # remove leap year days from DataFrame
            df = df.loc[~leap_mask].reset_index(drop=True)
            
            #total_years = int(len(df.index)/days_in_year)+1 
            total_years = math.ceil(len(df.index)/days_in_year)+2 # ensure array is large enough
            print("Total number of years: "+str(total_years))
            if(total_years < min_total_years):
                print("At least 50 years of data is needed.")
            else:     
                current_year = start_date.year - 1
                raw_data = np.zeros(shape=(total_years, days_in_year))
                row = -1
                col = -1
                i = 0
                for index, r in df.iterrows():
                    i = i + 1
                    if(df["year"].iloc[index] == current_year):
                        col = col + 1
                        if(math.isnan(df["daily_rain"].iloc[index])):
                            raw_data[row, col] = -1
                        else:
                            raw_data[row, col] = df["daily_rain"].iloc[index]
                    else:
                        current_year = df["year"].iloc[index]
                        col = 0
                        row = row + 1
                        if(math.isnan(df["daily_rain"].iloc[index])):
                            raw_data[row, col] = -1
                        else:
                            raw_data[row, col] = df["daily_rain"].iloc[index]
                
                current_year = start_date.year - 1
                row = -1
                col = -1
                num_negatives = 0
                i = 0
                for index, r in df.iterrows():
                    i = i + 1
                    if(df["year"].iloc[index] == current_year):
                        col = col + 1
                        if(math.isnan(df["daily_rain"].iloc[index])):
                            for j in range (total_years):
                                if(raw_data[j, col] == -1):
                                    num_negatives = num_negatives + 1
                            df["daily_rain"].iloc[index] = (sum(raw_data[:, col]) + num_negatives)/(total_years-num_negatives)
                            num_negatives = 0
                    else:
                        current_year = df["year"].iloc[index]
                        col = 0
                        row = row + 1
                        if(math.isnan(df["daily_rain"].iloc[index])):
                            for j in range (total_years):
                                if(raw_data[j, col] == -1):
                                    num_negatives = num_negatives + 1
                            df["daily_rain"].iloc[index] = (sum(raw_data[:, col]) + num_negatives)/(total_years-num_negatives)
                            num_negatives = 0
                print("Calculating Effective Precipitation")
                eps= df.ep.values
                with tqdm(total=(len(df))) as pbar:
                    for i in range(len(df.index)):
                        if i>antecedent_period:
                            eps[i]= effective_precipitation(df.loc[i-antecedent_period+1:i])
                        else:
                            eps[i]= 0
                        pbar.update(1)    
                df["ep"]=eps   
                print("Calculating Available Water Resource Index")  
                o = 0
                for c in range(antecedent_period, len(df.index), 1):
                    o = o + 1
                    df["awri"].iloc[c] = df["ep"].iloc[c] / weight
                print("Calculating Flood Index")
                current_year = start_date.year + 1
                years_max = np.linspace(0, 0, total_years-1)
                yr = 0
                for y in range(antecedent_period, len(df.index), 1):
                    if(df["year"].iloc[y] == current_year):
                        if(df["ep"].iloc[y] > years_max[yr]):
                            years_max[yr] = df["ep"].iloc[y]
                    else:
                        yr = yr + 1
                        current_year = df["year"].iloc[y]
                        years_max[yr] = df["ep"].iloc[y]

                for x in range(antecedent_period, len(df.index), 1):
                    df["fi"][x] = (df["ep"][x] - np.mean(years_max))/statistics.stdev(years_max)

                print("Flood Index Successfully Calculated")
                return df

In [None]:
# Read all precipitation 'all_rain' and observation stations data 'df_stations'
# example from notebook file 'Precipitation_download'

In [None]:
all_rain

In [None]:
# Read precipitation data at station
df= pd.DataFrame(columns=['date', 'daily_rain'])
df.date= all_rain[all_rain['station_id']==98210].ref
df.daily_rain= all_rain[all_rain['station_id']==98210].value
df=df[df['date']>='2010-01-01']
df=df[df['date']<'2022-01-01']

In [None]:
def effective_precipitation0(df):
    N= 365 # same length as df
    pe_temps= np.zeros(N)
    terms= np.zeros(N)
    for n in range(0, N):
        #print(n)
        #n_terms= N-n
        #print(n_terms)
        for i in range(n+1, N+1):
            #print(i)
            terms[n]= terms[n]+1/i
        #print(terms[n])
        #print(df["daily_rain"].iloc[-n-1])
        pe_temps[n]= df["daily_rain"].iloc[-n-1]*terms[n]
    pe=np.sum(pe_temps)
    return pe#, pe_temps#, terms

In [None]:
def effective_precipitation(df):
    N = 365
    pe_temps = np.zeros(N)
    terms = np.zeros(N)
    for n in range(N):
        terms_indices = np.arange(n+1, N+1)
        terms[n] = np.sum(1 / terms_indices)
        pe_temps[n] = df["daily_rain"].iloc[-n-1] * terms[n]
    pe = np.sum(pe_temps)
    return pe


In [None]:
import os
# Calculate daily flood index at all stations (with more than 50 years of data) and save
df_stations_filter=df_stations[df_stations['duration']>=50]
sids=df_stations_filter['id'].unique()
start_date='1920-01-01'
end_date='2022-01-01'
for sid in sids:
    print(sid)
    file_path = os.path.join('fidx', str(sid)+'.csv')
    if os.path.exists(file_path):
        print("File already exists")
    elif sid in all_rain.station_id.unique():
        try:
            # Read precipitation data at station
            df= pd.DataFrame(columns=['date', 'daily_rain'])
            df.date= all_rain[all_rain['station_id']==sid].ref
            df.daily_rain= all_rain[all_rain['station_id']==sid].value
            df=df[df['date']>=start_date]
            df=df[df['date']<end_date]
            df_results= calculate_fi(df)
            # save file in fidx folder
            if df_results is not None:
                df_results.loc[366:].to_csv(os.path.join(file_path), index=False)
        except:
            print('Skipped station')
            pass

In [None]:
def check_dates(df):
    df.date=pd.to_datetime(df.date)
    # create a sequence of dates from the first to the last date in df
    date_range = pd.date_range(start=df['date'].min(), end=df['date'].max(), freq='D')

    # check if any dates are missing in the 'date' column of df
    missing_dates = set(date_range) - set(df['date'])

    if missing_dates:
        # add missing dates to the 'date' column of df with nan values in other columns
        #missing_df = pd.DataFrame({'date': list(missing_dates)})
        df = pd.concat([df, missing_df]).sort_values(by='date').reset_index(drop=True)
        df = df.fillna(np.nan)
    return df