In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from my_functions_new import *
from my_plotting_functions import *

### Get version of landcover array with just 'urban' and 'rural' categories

In [2]:
# Read in the data
landcover, out_meta = prepare_rainfall_scenario_raster(model_directory + "LandCover_clipped.tif", True)
# Convert the 1 and 6 values to 10 (for urban) and the rest to 11 (for non-urban).  
landcover_mod =  np.where(landcover==1, 10, landcover)
landcover_mod =  np.where(landcover_mod==6, 10, landcover_mod)
# Convert the rest of the classes to 11
for i in [1,2,3,4,5,7,8,9]:
    landcover_mod =  np.where(landcover_mod==i, 11, landcover_mod)

### Define the names of the methods (shorter and longer versions)

In [3]:
short_ids = ['6h_feh_sp','6h_c1','6h_c2','6h_c3','6h_c4', '6h_c5', '6h_c6','6h_c7',
            '6h_c8','6h_c9', '6h_c10', '6h_c11','6h_c12','6h_c13','6h_c14', '6h_c15']   
methods = ['6h_single-peak', 'Cluster1', 'Cluster2', 'Cluster3', 'Cluster4', 'Cluster5', 'Cluster6', 'Cluster7', 'Cluster8',
           'Cluster9','Cluster10', 'Cluster11',  'Cluster12','Cluster13', 'Cluster14', 'Cluster15']  

### Find maximum intensity for each method and minute in which it occurs (to use in sorting results analysis)

In [4]:
maxs = []
min_of_maxs = []

# Add FEH data
feh_precip=pd.read_csv("../../CreateSyntheticRainfallEvents/ReFH2_singlepeak/6hr_100yrRP/PostLossRemoval/6h_feh_singlepeak.csv")
maxs.append(feh_precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].max())
min_of_maxs.append(feh_precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].idxmax())

#Add observed profile data
for cluster_num in range(1,16):
    precip=pd.read_csv("../../CreateSyntheticRainfallEvents/ObservedProfiles/6hr_100yrRP/PostLossRemoval/cluster{}_urban_summer.csv".format(cluster_num))
    maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].max())
    min_of_maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].idxmax())

### Create versions of lists of methods, in order based on max intensity and the the timing of the max intensity 

In [5]:
short_ids_by_loading=  pd.DataFrame({"min": min_of_maxs, 'cluster_num': short_ids}).sort_values('min')["cluster_num"].tolist()
short_ids_by_loading.remove('6h_feh_sp')
short_ids_by_loading = ['6h_feh_sp']+short_ids_by_loading

short_ids_by_intensity = pd.DataFrame({"min": maxs, 'cluster_num': short_ids}).sort_values('min', ascending = False)["cluster_num"].tolist()
short_ids_by_intensity.remove('6h_feh_sp')
short_ids_by_intensity = ['6h_feh_sp']+short_ids_by_intensity

### Create dataframe of colours for each cluster (based on their loading)

In [6]:
colours_df = create_colours_df(short_ids_by_loading, short_ids)

### Create list of filepaths, formatted to be used for either depth or velocity

In [7]:
fps = []
for method_num, short_id in enumerate(short_ids):
    fp = model_directory + "{}/{} (Max).Resampled.Terrain.tif".format(short_id, '{}')
    fps.append(fp)

### Define breaks for categorising velocity and depth

In [8]:
# Define breaks to split the depths/velocities on
breaks_depths = np.array([0, 0.3, 0.6, 1.2, 100])  
labels_depth = ['<=0.3m', '0.3-0.6m', '0.6-1.2m', '>1.2m']
breaks_velocity = np.array([0,0.25,0.5,2,100])
labels_velocity = ["<=0.25m/s", "0.25-0.5m/s", "0.5-2m/s", ">2m/s"]

# <u> Flood extent </u>
To examine whether the rainfall's temporal distribution influences the total extent of flooding, the number of flooded cells and the total flooded area in km2 (incl. only cells with depth >0.1m) is compared between the profile with a single peak, and the three methods for producing multi-peaked rainfall events. b

### Create dataframes containing the (total/urban) flooded area in each depth/velocity bin

In [9]:
velocity_counts, velocity_props = create_binned_counts_and_props(fps, 'Velocity', breaks_velocity, labels_velocity, remove_little_values)
depth_counts, depth_props = create_binned_counts_and_props(fps, 'Depth', breaks_depths, labels_depth, remove_little_values)

velocity_counts_urban, velocity_props_urban = create_binned_counts_and_props_urban(fps, 'Velocity', breaks_velocity, labels_velocity, remove_little_values, landcover_mod)
depth_counts_urban, depth_props_urban = create_binned_counts_and_props_urban(fps, 'Depth', breaks_depths, labels_depth, remove_little_values, landcover_mod)

### Create dataframes containing the (total/urban) flooded area

In [10]:
totals_df = create_totals_df(velocity_counts)
totals_df_urban = create_totals_df(velocity_counts_urban)      

### Create dataframes containing the % diff in the flooded area between single peak and each other method  

In [11]:
percent_diffs_df = find_percentage_diff (totals_df, fps) 
percent_diffs_df_urban = find_percentage_diff (totals_df_urban, fps)   

## Find number of cells in which each method leads to the worst flooding (depth/velocity)

In [None]:
# Find the number of flooded cells with the worst flooding for each method
worst_case_method_depth = find_worst_case_method(fps, short_ids, 'Depth')
worst_case_method_velocity = find_worst_case_method(fps, short_ids,  'Velocity') 

In [None]:
# Remove multiple matches and nan
worst_case_method_depth = worst_case_method_depth[~worst_case_method_depth['values'].isin(['multiple matches','nan'])]
worst_case_method_velocity = worst_case_method_velocity[~worst_case_method_velocity['values'].isin(['multiple matches','nan'])]

# # Reorder (and also add in the methods that are missing)
worst_case_method_depth = pd.merge(worst_case_method_depth,  pd.DataFrame({'values': short_ids}), how="outer")
worst_case_method_depth = worst_case_method_depth.reindex(worst_case_method_depth['values'].map(dict(zip(short_ids, range(len(short_ids))))).sort_values().index)
worst_case_method_depth.reset_index(inplace=True,drop=True)

worst_case_method_velocity = pd.merge(worst_case_method_velocity,  pd.DataFrame({'values': short_ids}), how="outer")
worst_case_method_velocity = worst_case_method_velocity.reindex(worst_case_method_velocity['values'].map(dict(zip(short_ids, range(len(short_ids))))).sort_values().index)
worst_case_method_velocity.reset_index(inplace=True,drop=True)

In [None]:
# fig, axs = plt.subplots(nrows=1, ncols=2, figsize = (20,7))
# worst_case_method_depth.plot(ax= axs[0], kind ='bar',width=  0.9, rot =45, ylabel = 'Number of cells')      
# worst_case_method_velocity.plot(ax= axs[1], kind ='bar',width=  0.9, rot =45, ylabel = 'Number of cells')  ;

## Find number of cells with each hazard rating

In [47]:
# Create
hazard_counts, hazard_props = create_binned_counts_and_props_hazard(fps)
# Reformat 
# hazard_counts = hazard_counts.set_index('index').T
# hazard_counts = hazard_counts.add_suffix('_numcells')
# hazard_counts['Cluster_num'] = hazard_counts.index

# hazard_props = hazard_props.set_index('index').T
# hazard_props = hazard_props.add_suffix('_propcells')
# hazard_props['Cluster_num'] = hazard_props.index

## Find number of cells which have moved between hazard categories

In [None]:
hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(fps)

### Create a dataframe containing all the info on each of the scenarios

In [None]:
cluster_results = pd.DataFrame({'Cluster_num': short_ids, "MaxRainfallIntensity": maxs,  
    "MaxRainfallIntensityMinute": min_of_maxs,
   'TotalFloodedArea':totals_df['FloodedArea'],'%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs'],
    '%Diff_FloodedArea_fromSP_formatted':percent_diffs_df['percent_diff_formatted'],
    'Abs%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs_abs'],'UrbanFloodedArea':totals_df_urban['FloodedArea'],
  '%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs'],
  '%Diff_UrbanFloodedArea_fromSP_formatted':percent_diffs_df_urban['percent_diff_formatted'],
    'Abs%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs_abs'], 
    'WorstCaseDepth_ncells': worst_case_method_depth['counts'].tolist(),
    'WorstCaseVelocity_ncells': worst_case_method_velocity['counts'].tolist(), 'colour':colours_df['colour']}) 

### Add the hazard categories to this

In [None]:
cluster_results = pd.merge(cluster_results, hazard_cat_changes,  how="outer", on = 'Cluster_num')
cluster_results = pd.merge(cluster_results, hazard_counts,  how="outer", on = 'Cluster_num')
cluster_results = pd.merge(cluster_results, hazard_props,  how="outer", on = 'Cluster_num')

### Add the depth/velocity category breakdowns to this

In [48]:
dfs = [velocity_props, depth_props, velocity_props_urban, depth_props_urban,velocity_counts, depth_counts,
          velocity_counts_urban, depth_counts_urban,hazard_counts, hazard_props]
labels = ['_propcells', '_propcells','_propcells_urban','_propcells_urban','_countcells','_countcells','_countcells_urban','_countcells_urban',
'_numcells', '_numcells']

len(dfs)
# for df in dfs
    
#     df = df.set_index('index').T
#     df = df.add_suffix('_countcells') 
#     df['Cluster_num'] = df.index
#     cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')


    
# #####################
# for df in [velocity_props, depth_props ]:
#     df = df.set_index('index').T
#     df = df.add_suffix('_propcells')
#     df['Cluster_num'] = df.index
#     cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')

# for df in [velocity_props_urban, depth_props_urban ]:
#     df = df.set_index('index').T
#     df = df.add_suffix('_propcells_urban')
#     df['Cluster_num'] = df.index
#     cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')    
    
# for df in [velocity_counts, depth_counts]:
#     df = df.set_index('index').T
#     df = df.add_suffix('_countcells') 
#     df['Cluster_num'] = df.index
#     cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')
    
# for df in [velocity_counts_urban, depth_counts_urban]:
#     df = df.set_index('index').T
#     df = df.add_suffix('_countcells_urban') 
#     df['Cluster_num'] = df.index
#     cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')    
    

    
# for df in enumerate([hazard_counts, hazard_props]):
#     df = df.set_index('index').T
#     if num ==0:
#         df = df.add_suffix('_numcells')
#     else:
#         df = df.add_suffix('_numcells')
#     df['Cluster_num'] = df.index
#     cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')        

NameError: name 'velocity_props' is not defined

### Save to file

In [None]:
cluster_results.to_csv("allclusters_summary.csv", index=False)

In [None]:
# # Create dataframes to populate with values
# overall_df = pd.DataFrame(columns = ["values"])

# for fp in fps[1:]:
#     # Define name of method
#     method_name = re.search('{}(.*)/'.format(model_directory), fp).group(1)
    
#     # Read in data
#     fp = '../../../../FloodModelling/MeganModel_New/{}/hazard_cat_difference.tif'.format(fp.split('New/')[1].split('/{}')[0])
#     hazard = prepare_rainfall_scenario_raster(fp, False)[0]
#     # Get a dataframe of the number of each hazard change category
#     unique, counts = np.unique(hazard, return_counts=True)
#     df = pd.DataFrame({'values': unique, '{}_counts'.format(method_name):counts})
#     # Remove NA columns
#     df = df.dropna()

#     # Find the total number of cells
#     total_n_cells = df ['{}_counts'.format(method_name)].sum()
#     # Find the number of cells in each group as a proportion of the total
#     df['{}_Proportion'.format(method_name)] = round((df['{}_counts'.format(method_name)]/total_n_cells) *100,1)
    
#     # Add to dataframe of all scenario results
#     overall_df= overall_df.merge(df[['values', '{}_Proportion'.format(method_name), '{}_counts'.format(method_name)]], on = 'values', how = 'outer')
