In [1]:
import pandas as pd
import numpy as np

from my_functions_sp_scaled import *

### Get version of landcover array with just 'urban' and 'rural' categories

In [2]:
# Read in the data
landcover, out_meta = prepare_rainfall_scenario_raster("../../ProcessLandCoverData/LandCover_clipped.tif", True)
# Convert the 1 and 6 values to 10 (for urban) and the rest to 11 (for non-urban).  
landcover_mod =  np.where(landcover==1, 10, landcover)
landcover_mod =  np.where(landcover_mod==6, 10, landcover_mod)
# Convert the rest of the classes to 11
for i in [1,2,3,4,5,7,8,9]:
    landcover_mod =  np.where(landcover_mod==i, 11, landcover_mod)

### Define the names of the method (shorter and longer versions)

In [3]:
# short_ids = ['6h_feh_sp', '6h_sp', '6h_sp_+10']   
# methods = ['6h_feh_single-peak','6h_single-peak','6h_single-peak_+10%volume']  

methods = ['6h_feh_sp','6h_sp_+0','6h_sp_+05','6h_sp_+10','6h_sp_+20']  

### Find maximum intensity for each method and minute in which it occurs (to use in sorting results analysis)

In [4]:
maxs = []
min_of_maxs = []

# Add FEH data
feh_precip=pd.read_csv("../../CreateSyntheticRainfallEvents/ReFH2_singlepeak/6hr_100yrRP/PostLossRemoval/6h_feh_singlepeak_urban.csv")
maxs.append(feh_precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].max())
min_of_maxs.append(feh_precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].idxmax())

for method in methods[1:]:
    precip=pd.read_csv("../../CreateSyntheticRainfallEvents/SinglePeak_Scaled/6hr_100yrRP/PostLossRemoval/{}_urban.csv".format(method))
    maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].max())
    min_of_maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].idxmax())

### Create versions of lists of methods, in order based on max intensity and the the timing of the max intensity 

In [None]:
# short_ids_by_loading=  pd.DataFrame({"min": min_of_maxs, 'method_name': short_ids}).sort_values('min')["method_name"].tolist()
# short_ids_by_loading.remove('6h_feh_sp')
# short_ids_by_loading = ['6h_feh_sp']+short_ids_by_loading

# short_ids_by_intensity = pd.DataFrame({"min": maxs, 'method_name': short_ids}).sort_values('min', ascending = False)["method_name"].tolist()
# short_ids_by_intensity.remove('6h_feh_sp')
# short_ids_by_intensity = ['6h_feh_sp']+short_ids_by_intensity

### Create dataframe of colours for each cluster (based on their loading)

In [None]:
# colours_df = create_colours_df(short_ids_by_loading, short_ids)

### Create list of filepaths, formatted to be used for either depth or velocity

In [5]:
fps = []
for method_num, method in enumerate(methods):
    if method == "6h_feh_sp":
        model_directory = '../../../../FloodModelling/Model_FEH_profiles/'
    elif method == "6h_sp":
        model_directory = '../../../../FloodModelling/Model_SyntheticProfiles/'        
    else:
        model_directory = '../../../../FloodModelling/Model_SinglePeak_Scaled/'
    
    fp = model_directory + "{}/{} (Max).Resampled.Terrain.tif".format(method, '{}')
    fps.append(fp)

### Define breaks for categorising velocity and depth

In [6]:
# Define breaks to split the depths/velocities on
breaks_depths = np.array([0, 0.3, 0.6, 1.2, 100])  
labels_depth = ['<=0.3m', '0.3-0.6m', '0.6-1.2m', '>1.2m']
breaks_velocity = np.array([0,0.25,0.5,2,100])
labels_velocity = ["<=0.25m/s", "0.25-0.5m/s", "0.5-2m/s", ">2m/s"]

# <u> Flood extent </u>
To examine whether the rainfall's temporal distribution influences the total extent of flooding, the number of flooded cells and the total flooded area in km2 (incl. only cells with depth >0.1m) is compared between the profile with a single peak, and the three methods for producing multi-peaked rainfall events. b

### Create dataframes containing the (total/urban) flooded area in each depth/velocity bin

In [7]:
velocity_counts, velocity_props = create_binned_counts_and_props(fps, 'Velocity', breaks_velocity, labels_velocity, remove_little_values)
depth_counts, depth_props = create_binned_counts_and_props(fps, 'Depth', breaks_depths, labels_depth, remove_little_values)

velocity_counts_urban, velocity_props_urban = create_binned_counts_and_props_urban(fps, 'Velocity', breaks_velocity, labels_velocity, remove_little_values, landcover_mod)
depth_counts_urban, depth_props_urban = create_binned_counts_and_props_urban(fps, 'Depth', breaks_depths, labels_depth, remove_little_values, landcover_mod)

### Create dataframes containing the (total/urban) flooded area

In [60]:
totals_df = create_totals_df(velocity_counts)
totals_df_urban = create_totals_df(velocity_counts_urban)      

In [61]:
totals_df

Unnamed: 0,short_id,FloodedArea
0,6h_feh_sp,1.701144
1,6h_sp_+0,1.746234
2,6h_sp_+05,1.519598
3,6h_sp_+10,1.587015
4,6h_sp_+20,1.744572


### Create dataframes containing the % diff in the flooded area between single peak and each other method  

In [20]:
percent_diffs_df = find_percentage_diff (totals_df, fps) 
percent_diffs_df_urban = find_percentage_diff (totals_df_urban, fps)   

## Find number of cells in which each method leads to the worst flooding (depth/velocity)

In [22]:
# Find the number of flooded cells with the worst flooding for each method
worst_case_method_depth = find_worst_case_method(fps, methods, 'Depth')
worst_case_method_velocity = find_worst_case_method(fps, methods,  'Velocity') 

In [None]:
# Remove multiple matches and nan
worst_case_method_depth = worst_case_method_depth[~worst_case_method_depth['values'].isin(['multiple matches','nan'])]
worst_case_method_velocity = worst_case_method_velocity[~worst_case_method_velocity['values'].isin(['multiple matches','nan'])]

# # Reorder (and also add in the methods that are missing)
worst_case_method_depth = pd.merge(worst_case_method_depth,  pd.DataFrame({'values': short_ids}), how="outer")
worst_case_method_depth = worst_case_method_depth.reindex(worst_case_method_depth['values'].map(dict(zip(short_ids, range(len(short_ids))))).sort_values().index)
worst_case_method_depth.reset_index(inplace=True,drop=True)

worst_case_method_velocity = pd.merge(worst_case_method_velocity,  pd.DataFrame({'values': short_ids}), how="outer")
worst_case_method_velocity = worst_case_method_velocity.reindex(worst_case_method_velocity['values'].map(dict(zip(short_ids, range(len(short_ids))))).sort_values().index)
worst_case_method_velocity.reset_index(inplace=True,drop=True)

## Find number of cells with each hazard rating

In [40]:
def create_binned_counts_and_props(fps, variable_name, breaks, labels, remove_little_values):
    # Create dataframes to populate with values
    counts_df = pd.DataFrame()
    proportions_df = pd.DataFrame()        

    # Loop through each rainfall scenario
    # Get the raster containing its values, and count the number of each unique value, and construct into a dataframe
    for fp in fps  :
        # Classify depth/velocity rasters into depth/velocity bins
        raster = prepare_rainfall_scenario_raster(fp.format(variable_name), remove_little_values)[0]
        unique, counts = np.unique(raster, return_counts=True)
        df = pd.DataFrame({'values': unique, 'counts':counts})

        # Add a new column specifying the bin which each value falls within
        df['bins']= pd.cut(unique, bins=breaks, right=False)

        # Create a new dataframe showing the number of cells in each of the bins
        groups = df.groupby(['bins']).sum()
        groups  = groups.reset_index()

        # Find the total number of cells
        total_n_cells = groups ['counts'].sum()
        # Find the number of cells in each group as a proportion of the total
        groups['Proportion'] = round((groups['counts']/total_n_cells) *100,1)

        # Add values to dataframes
        method_name = fp.split("/")[6]
        counts_df[method_name] = groups['counts']
        proportions_df[method_name] = groups['Proportion']

    # Reset index to show the groups
    counts_df.reset_index(inplace=True)
    proportions_df.reset_index(inplace=True)

    # Set index values
    counts_df['index'] = labels
    proportions_df['index'] = labels

    return counts_df, proportions_df

def create_binned_counts_and_props_hazard(fps):

    # Create dataframes to populate with values
    counts_df = pd.DataFrame()
    proportions_df = pd.DataFrame()      

    for fp in fps:
        # Define filepath
        fp = fp.replace('{} (Max).Resampled.Terrain', 'hazard_classified')
        # Read in data
        hazard = prepare_rainfall_scenario_raster(fp, remove_little_values)[0]
        # Count the number of each value
        unique, counts = np.unique(hazard, return_counts=True)
        df = pd.DataFrame({'values': unique, 'counts':counts})
        # Remove Nan values
        df = df.dropna()

        # Find the total number of cells
        total_n_cells = df ['counts'].sum()
        # Find the number of cells in each group as a proportion of the total
        df['Proportion'] = round((df['counts']/total_n_cells) *100,1)
        
        # Add values to dataframes
        method_name = fp.split("/")[6]
        counts_df[method_name] = df['counts']
        proportions_df[method_name] = df['Proportion']

    # Reset index to show the groups
    counts_df.reset_index(inplace=True)
    proportions_df.reset_index(inplace=True)

    # Set index values
    labels_hazard = ['Low hazard', 'Moderate hazard', 'Significant hazard', 'Extreme hazard']
    counts_df['index'] = labels_hazard
    proportions_df['index'] = labels_hazard
    return counts_df, proportions_df

def create_binned_counts_and_props_hazard_cat_change(fps):
    
    replacement_dict = {-3.0: 'Hazard_3CatsLower', -2.0 : 'Hazard_2CatsLower', -1.0 : 'Hazard_1CatsLower', 0: 'Hazard_SameCat',
        1 : 'Hazard_1CatsHigher', 2: 'Hazard_2CatsHigher', 3: 'Hazard_3CatsHigher'}
    
    # Create dataframes to populate with values
    counts_df = pd.DataFrame(columns = ["values"])
    proportions_df = pd.DataFrame(columns = ["values"]) 

    for fp in fps[1:]:
        # Add values to dataframes
        method_name = fp.split("/")[6]
        # Read in hazard data 
        fp = fp.replace('{} (Max).Resampled.Terrain', 'hazard_cat_difference')
        hazard = prepare_rainfall_scenario_raster(fp, False)[0]
        unique, counts = np.unique(hazard, return_counts=True)
        df = pd.DataFrame({'values': unique, method_name:counts})
        # Remove NA columns
        df = df.dropna()

        # Add to dataframes
        counts_df= counts_df.merge(df[['values', method_name]], on = 'values', how = 'outer')

        # Find the total number of cells
        total_n_cells = df [method_name].sum()
        # Find the number of cells in each group as a proportion of the total
        df[method_name] = round((df[method_name]/total_n_cells) *100,1)

       # Add to dataframes
        proportions_df= proportions_df.merge(df[['values', method_name]], on = 'values', how = 'outer')

        # Order intoi ascending order
        proportions_df = proportions_df.sort_values(by='values')
        counts_df = counts_df.sort_values(by='values')

    # Join the two dataframes together and reformat
    both_dfs = pd.DataFrame(columns = ["Cluster_num"])  
    for num, df in enumerate([counts_df,proportions_df]):
        df=df.replace({"values": replacement_dict})
        df.rename(columns={'values': 'Cluster_num'}, inplace=True)
        df = df.set_index('Cluster_num').T
        if num == 0:
            df = df.add_suffix('_countcells')
        else:
            df = df.add_suffix('_propcells')
        df['Cluster_num'] = df.index
        both_dfs = pd.merge(both_dfs, df,  how="outer", on = 'Cluster_num')
    
    return both_dfs


## Find number of cells which have moved between hazard categories

In [42]:
hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(fps)
hazard_cat_changes

Unnamed: 0,Hazard_3CatsLower_countcells,Hazard_2CatsLower_countcells,Hazard_1CatsLower_countcells,Hazard_SameCat_countcells,Hazard_1CatsHigher_countcells,Hazard_2CatsHigher_countcells,Hazard_3CatsHigher_countcells,Cluster_num,Hazard_3CatsLower_propcells,Hazard_2CatsLower_propcells,Hazard_1CatsLower_propcells,Hazard_SameCat_propcells,Hazard_1CatsHigher_propcells,Hazard_2CatsHigher_propcells,Hazard_3CatsHigher_propcells
0,,88.0,24810.0,1675916.0,110.0,4.0,,6h_sp,,0.0,1.5,98.5,0.0,0.0,
1,593.0,7578.0,104398.0,1136855.0,104585.0,28877.0,882.0,6h_sp_+5,0.0,0.5,7.5,82.2,7.6,2.1,0.1
2,474.0,13036.0,154260.0,1114926.0,91963.0,27653.0,872.0,6h_sp_+10,0.0,0.9,11.0,79.5,6.6,2.0,0.1
3,708.0,32443.0,257114.0,1032552.0,80828.0,26031.0,870.0,6h_sp_+20,0.0,2.3,18.0,72.2,5.7,1.8,0.1


### Create a dataframe containing all the info on each of the scenarios

In [47]:
methods

['6h_feh_sp', '6h_sp', '6h_sp_+5', '6h_sp_+10', '6h_sp_+20']

In [45]:
cluster_results = pd.DataFrame({'Cluster_num': methods, "MaxRainfallIntensity": maxs,  
    "MaxRainfallIntensityMinute": min_of_maxs})
#    'TotalFloodedArea':totals_df['FloodedArea'],'%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs'],
#     '%Diff_FloodedArea_fromSP_formatted':percent_diffs_df['percent_diff_formatted'],
#     'Abs%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs_abs'],'UrbanFloodedArea':totals_df_urban['FloodedArea'],
#   '%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs'] ,
#   '%Diff_UrbanFloodedArea_fromSP_formatted':percent_diffs_df_urban['percent_diff_formatted'],
#     'Abs%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs_abs'], 
#     'WorstCaseDepth_ncells': worst_case_method_depth['counts'].tolist(),
#     'WorstCaseVelocity_ncells': worst_case_method_velocity['counts'].tolist()}) 

ValueError: All arrays must be of the same length

### Add the depth/velocity category breakdowns and hazard categories to this

In [None]:
dfs = [velocity_props, depth_props, velocity_props_urban, depth_props_urban, velocity_counts, depth_counts,
          velocity_counts_urban, depth_counts_urban,hazard_counts, hazard_props]
suffixes = ['_propcells', '_propcells','_propcells_urban','_propcells_urban','_countcells','_countcells','_countcells_urban','_countcells_urban',
'_numcells', '_propcells']

for num, df in enumerate(dfs):
    # Reformat the dataframe
    df = df.set_index('index').T
    # Add the correct suffix to the column names
    df = df.add_suffix(suffixes[num]) 
    # Add Cluster_num column for joining
    df['Cluster_num'] = df.index#
    # Join to cluster results dataframe
    cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')
    
cluster_results = pd.merge(cluster_results, hazard_cat_changes,  how="outer", on = 'Cluster_num')    

### Save to file

In [None]:
cluster_results.to_csv("Data/allclusters_summary.csv", index=False)

### Delete tiff files (as these aren't used again and take up a lot of space)

In [None]:
# for method in short_ids:
#     print(method)
#     if method != '6h_feh_sp':
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/hazard_cat_difference.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_difffromsinglepeak_classified.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_difffromsinglepeak_posneg.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_difffromsinglepeak_classified.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_difffromsinglepeak_posneg.tif".format(method)) 
        
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_classified.tif".format(method)) 
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/hazard_classified.tif".format(method)) 
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_classified.tif".format(method)) 