In [None]:
# This notebook shows how false and true positive rates were calculated, along with other
# binary classification metrics

In [None]:
import pandas as pd
import numpy as np
# Merge all extracted data from hazard and normal conditions
df_results=pd.read_csv('extracted_event_short_allvar.csv')
df_results2=pd.read_csv('extracted_event_flood_allvar.csv') # long list of floods, including from snow melt and river
df_results=df_results[df_results['Hazard']!='Flood'].append(df_results2)
df_results3=pd.read_csv('extracted_event_random_allvar.csv') # random normal conditions
df_results=df_results.append(df_results3)
# Remove invalid data
df_results[(df_results['Value']<=0) & (df_results['Indicator']=='Daily Discharge')]=np.nan
df_results[(df_results['Value']<=0) & (df_results['Indicator']=='Daily Precipitation')]=np.nan
df_results[(df_results['Value']<=0) & (df_results['Indicator']=='3-Month Precipitation')]=np.nan
df_results[(df_results['Value']<=0) & (df_results['Indicator']=='6-Month Precipitation')]=np.nan
df_results=df_results.dropna()

In [None]:
df_results.Hazard.unique()

In [None]:
df_results.Event.unique()

In [None]:
# Calculate true and false positive rates
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
def performance_rates(df_results, thresholds_df):
    df_results2=df_results.copy()
    df_results2['Hazard'] = df_results2['Hazard'].replace('Heat wave', 'Heatwave')
    # Step 1: Merge the dataframes
    #merged_df = pd.merge(df_results2, thresholds_df, on=['Hazard', 'Indicator'])
    merged_df = df_results2
    # Step 2: Calculate true positive and false positive
    merged_df['Actual Hazard'] = merged_df['Hazard']
    merged_df['Detected Hazard'] = 'None'
    merged_df['Threshold'] = np.nan
    merged_df['True Positive'] = 0 # If Detected Hazard is the same as the documented
    merged_df['True Negative'] = 0 # If hazard is not detected correctly
    merged_df['False Positive'] = 0 # If Detected Hazard identifies the presence of an event that is actually not present.
    merged_df['False Negative'] = 0 # If hazard is not detected wrongly
    merged_df['Accuracy'] = 0
    for i in range(len(merged_df)):
        indicator= merged_df.Indicator.iloc[i]
        doc_hazard= merged_df.Hazard.iloc[i] # actual hazard
        indicator_df= thresholds_df[thresholds_df['Indicator']==indicator]
        #indicator_df= indicator_df[indicator_df['Hazard']==doc_hazard]
        if len(indicator_df)>0:
            ttype= indicator_df.Type.iloc[0]
            threshold= indicator_df.Threshold.iloc[0]
            obs_hazard= indicator_df.Hazard.iloc[0]
            value= merged_df.Value.iloc[i]
            merged_df['Threshold'].iloc[i]=threshold
            if ttype=='Upper':
                if value<=threshold:
                    merged_df['Detected Hazard'].iloc[i]=obs_hazard
            elif ttype=='Lower':
                if value>=threshold:
                    merged_df['Detected Hazard'].iloc[i]=obs_hazard
                #else:
                    #merged_df['Detected Hazard'].iloc[i]='None'
            if merged_df['Detected Hazard'].iloc[i]==merged_df['Actual Hazard'].iloc[i] and merged_df['Detected Hazard'].iloc[i]!='None':
                merged_df['True Positive'].iloc[i]=1
            if merged_df['Detected Hazard'].iloc[i]=='None' and merged_df['Actual Hazard'].iloc[i]!=obs_hazard:
                merged_df['True Negative'].iloc[i]=1
            if merged_df['Detected Hazard'].iloc[i]!='None' and merged_df['Detected Hazard'].iloc[i]!=merged_df['Actual Hazard'].iloc[i]:
                merged_df['False Positive'].iloc[i]=1
            if merged_df['Detected Hazard'].iloc[i]=='None' and merged_df['Actual Hazard'].iloc[i]==obs_hazard:
                merged_df['False Negative'].iloc[i]=1
            
    indicators=thresholds_df.Indicator.values
    hazards=thresholds_df.Hazard.values
    thres=thresholds_df.Threshold.values
    accuracies = []
    tps=[]
    tns=[]
    fps=[]
    fns=[]
    precisions = []
    recalls = []
    f1s = []
    for i in range(len(indicators)):
        indicator= indicators[i]
        hazard= hazards[i]
        indicator_df= merged_df[merged_df['Indicator'] == indicator]
        indicator_df = indicator_df[(indicator_df['Actual Hazard'] == hazard) | (indicator_df['Actual Hazard'] == 'None')]
        tp = indicator_df['True Positive'].sum()
        tn = indicator_df['True Negative'].sum()
        fp = indicator_df['False Positive'].sum()
        fn = indicator_df['False Negative'].sum()
        ta = len(indicator_df[indicator_df['Actual Hazard']== hazard]) # total actual hazards
        tps.append(tp)
        tns.append(tn)
        fps.append(fp)
        fns.append(fn)
        accuracy= (tp+tn)/(tp+tn+fp+fn) # number of correct detection over number of wrong detections
        accuracies.append(accuracy)
        precision= (tp)/(tp+fp) 
        precisions.append(precision)
        recall= (tp)/(tp+fn) 
        recalls.append(recall)
        f1= (2*tp)/(2*tp+fp+fn) 
        f1s.append(f1)
    result=pd.DataFrame(columns=['Indicator', 'Hazard', 'Threshold'])
    result['Indicator']=indicators
    result['Hazard']=hazards
    result['Threshold']=thres
    result['F-Score'] = f1s
    result['Precision'] = precisions
    result['Recall'] = recalls
    result['True Positive']=tps
    result['False Positive']=fps
    result['True Negative']=tns
    result['False Negative']=fns
    result['Accuracy'] = accuracies
    
    return result, merged_df

In [None]:
import numpy as np
# Define the range of thresholds and corresponding hazards for each indicator
threshold_ranges = {
    'HWI': {
        'range': (-25, 10, 1),
        'hazard': 'Heatwave',
        'type': 'Lower',
    },
    'Max. Daily Temperature': {
        'range': (1, 41, 2),
        'hazard': 'Heatwave',
        'type': 'Lower'
    },
    'SPEI12': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'SSI12': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'DFI': {
        'range': (-4, 4, 0.5),
        'hazard': 'Flood',
        'type': 'Lower'
    },
    'Daily Discharge': {
        'range': (0, 3000, 100),
        'hazard': 'Flood',
        'type': 'Lower'
    },
    '12-Month Precipitation': {
        'range': (0, 1500, 100),
        'hazard': 'Drought',
        'type': 'Upper'
    },
    'Daily Precipitation': {
        'range': (0, 100, 10),
        'hazard': 'Flood',
        'type': 'Lower'
    },
        'HWI-EDO': {
        'range': (-25, 10, 1),
        'hazard': 'Heatwave',
        'type': 'Lower',
    },
    'Min. Daily Temperature': {
        'range': (-11, 31, 2),
        'hazard': 'Heatwave',
        'type': 'Lower'
    },
    'SPEI3': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'SPEI6': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'SPI3': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'SPI6': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'SPI12': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'SSI3': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    'SSI6': [
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Drought',
            'type': 'Upper'
        },
        {
            'range': (-3.5, 3.5, 0.5),
            'hazard': 'Flood',
            'type': 'Lower'
        }
    ],
    '3-Month Precipitation': {
        'range': (0, 500, 20),
        'hazard': 'Drought',
        'type': 'Upper'
    },
    '6-Month Precipitation': {
        'range': (0, 800, 20),
        'hazard': 'Drought',
        'type': 'Upper'
    },
}

# Create an empty list to store the new threshold dataframe rows
new_threshold_rows = []

# Iterate over each indicator and generate the thresholds
for indicator, threshold_info in threshold_ranges.items():
    if isinstance(threshold_info, list):
        # For indicators with multiple threshold ranges
        for threshold_range in threshold_info:
            range_info = threshold_range['range']
            hazard = threshold_range['hazard']
            ttype = threshold_range['type']
            
            lower_bound, upper_bound, step_size = range_info
            
            # Generate the thresholds within the specified range
            thresholds = np.arange(lower_bound, upper_bound + step_size, step_size)
            #if indicator=='Daily Precipitation':
                #thresholds = np.logspace(np.log10(lower_bound), np.log10(upper_bound), num=step_size)
            
            # Create a row for each threshold and add it to the list
            for threshold in thresholds:
                new_threshold_rows.append({
                    'Indicator': indicator,
                    'Threshold': threshold,
                    'Hazard': hazard,
                    'Type': ttype,
                })
    else:
        # For indicators with a single threshold range
        range_info = threshold_info['range']
        hazard = threshold_info['hazard']
        ttype = threshold_info['type']
        
        lower_bound, upper_bound, step_size = range_info
        
        # Generate the thresholds within the specified range
        thresholds = np.arange(lower_bound, upper_bound + step_size, step_size)
        if indicator=='Daily Precipitation':
            thresholds = np.array([0, 1.00e-01, 3.00e-01, 6.00e-01, 1.00e+00, 1.50e+00, 2.30e+00,
       3.50e+00, 5.20e+00, 7.80e+00, 1.33e+01, 2.00e+02])
            print('Daily Precipitation exception')
        if indicator=='Daily Discharge':
            thresholds = np.array([0.000000e-00, 1.020000e-01, 4.299600e-01, 9.855300e-01,
       2.107380e+00, 5.000000e+00, 7.800000e+00, 1.258228e+01,
       2.300000e+01, 5.400000e+01, 2.000000e+03])
            print('Daily Discharge exception')
        
        # Create a row for each threshold and add it to the list
        for threshold in thresholds:
            new_threshold_rows.append({
                'Indicator': indicator,
                'Threshold': threshold,
                'Hazard': hazard,
                'Type': ttype,
            })

# Create a new dataframe with the varying thresholds
varying_thresholds_df = pd.DataFrame(new_threshold_rows)


In [None]:
varying_thresholds_df.to_excel('Threshold List.xlsx')

In [None]:
varying_thresholds_df[varying_thresholds_df['Indicator']=='Daily Precipitation']

In [None]:
from tqdm.notebook import tqdm
# Calculate combined results for all extracted data
all_results = []  # Create an empty list to store the results
all_merged = []  # Create an empty list to store the results
combined_results =pd.DataFrame()
#varying_thresholds_df = varying_thresholds_df[varying_thresholds_df['Indicator']=='Daily Discharge']
with tqdm(total=len(varying_thresholds_df)) as pbar:
    for i in range(len(varying_thresholds_df)):
        row_df = pd.DataFrame(varying_thresholds_df.iloc[i]).T
        indicator=row_df.Indicator.iloc[0]
        indicator_df=df_results[df_results['Indicator']==indicator]
        result, merged = performance_rates(indicator_df, row_df)
        all_results.append(result)  # Append each result DataFrame to the list
        all_merged.append(merged)  # Append each result DataFrame to the list
        pbar.update()

# Concatenate all the DataFrames in the list
combined_results = pd.concat(all_results)

# Concatenate all the DataFrames in the list
combined_merged = pd.concat(all_merged)

# Print the combined results
print(combined_results)


In [None]:
combined_results.to_excel('bc_evaluation_results_.xlsx')
