In [1]:
catchment_name = 'LinDyke' #LinDyke
methods_key ='Observed'

# Something in setting crop to to true for Wyke Beck doesn't work (the land cover and results files have different
# values for out_meta so end up different sizes)
if catchment_name =='WykeBeck':
    crop_or_not = False
elif catchment_name == 'LinDyke':
    crop_or_not = True    

region = '' # 'Kippax' #'' # 'Garforth'

In [2]:
# Set up link to model directory and read in catchment shapefile
model_directory = '../../../FloodModelling/{}Models/Model_{}Profiles/'.format(catchment_name, methods_key)
landcover_directory = '../../../FloodModelling/{}Models/LandCoverData/'.format(catchment_name)

# Define whether to filter out values <0.1
remove_little_values = True

In [3]:
import pandas as pd
import numpy as np
import sys
import geopandas as gpd

sys.path.append("../")
from my_functions import *

# Specify strings relating to catchment
if catchment_name == 'LinDyke':
    catchment_name_str = "Resampled.Terrain" 
    catchment_gdf = gpd.read_file(model_directory + 'CatchmentLinDyke_exported.shp')
    cell_size_in_m2 = 1
elif catchment_name == 'WykeBeck':
    catchment_name_str = "Terrain.wykeDEM" 
    cell_size_in_m2 = 4
    catchment_gdf = gpd.read_file(model_directory + 'WykeBeckCatchment.shp')

### Define the names of the method (in dictionary for different model runs)

In [4]:
methods_dict = {'Idealised': [ '6h_sp_c_0.5','6h_sp_fl_0.1', '6h_sp_fl_0.2', '6h_sp_fl_0.3', '6h_sp_fl_0.4',
                    '6h_sp_bl_0.6','6h_sp_bl_0.7','6h_sp_bl_0.8','6h_sp_bl_0.9'],
                'Observed':['6h_feh_singlepeak', '6h_c1','6h_c2','6h_c3','6h_c4', '6h_c5', '6h_c6','6h_c7',
             '6h_c8','6h_c9','6h_c10', '6h_c11', '6h_c12','6h_c13','6h_c14','6h_c15'], 
               'SinglePeak_Scaled':['6h_sp_+0%','6h_sp_+5%','6h_sp_+10%','6h_sp_+15%','6h_sp_+20%']}

In [5]:
methods = methods_dict[methods_key]

### Get version of landcover array with just 'urban' and 'rural' categories

In [6]:
# Water landcover classification - 10 is water, 11 is eveyrthing else
with rasterio.open(landcover_directory + 'LandCover_notwater_classification.tif', 'r') as ds:
    landcover_notwater = ds.read()[0]
    out_meta = ds.meta
landcover_notwater_flat = landcover_notwater.flatten()

# Urban landcover classification - 10 is urban, 1 is everything else
with rasterio.open(landcover_directory + 'LandCover_urban_and_suburban_classification.tif', 'r') as ds:
    landcover_urban = ds.read()[0]
landcover_urban_flat = landcover_urban.flatten()

### Find maximum intensity for each method and minute in which it occurs (to use in sorting results analysis)

In [7]:
maxs = []
min_of_maxs = []

for method in methods:
    if method == '6h_feh_singlepeak':
        precip=pd.read_csv("../CreateSyntheticRainfallEvents/FEHProfiles/{}/6hr_100yrRP/PostLossRemoval/6hr_100yrRP_6.01h_1mintimestep.csv".format(catchment_name))
    else:
        precip=pd.read_csv("../CreateSyntheticRainfallEvents/{}Profiles/{}/6hr_100yrRP/PostLossRemoval/{}_urban.csv".format(methods_key,catchment_name, method))
    # Trim and add minutes column
    precip = precip[0:360].copy()
    precip['minute']=range(1,361)
    # Add max and minutes of max
    maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].max())
    min_of_maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].idxmax())

### Create versions of lists of methods, in order based on max intensity and the the timing of the max intensity 

In [8]:
short_ids_by_loading=  pd.DataFrame({"min": min_of_maxs, 'method_name': methods}).sort_values('min')["method_name"].tolist()
short_ids_by_intensity = pd.DataFrame({"min": maxs, 'method_name': methods}).sort_values('min', ascending = False)["method_name"].tolist()

### Create dataframe of colours for each cluster (based on their loading)

In [9]:
if methods_key == 'Observed':
    colours_df = create_colours_df_observed(short_ids_by_loading, methods)
elif methods_key == 'Idealised':
    colours_df = create_colours_df_idealised( short_ids_by_loading, methods)
elif methods_key == 'SinglePeak_Scaled':
    colours_df = create_colours_df_sp( short_ids_by_loading, methods)

### Create list of filepaths, formatted to be used for either depth or velocity

In [10]:
fps = []
for method_num, short_id in enumerate(methods):
    fp = model_directory + "{}/{} (Max).{}.tif".format(short_id, '{}', catchment_name_str)
    fps.append(fp)
if methods_key == 'Observed':
    fps[0] = '../../../FloodModelling/{}Models/Model_FEHProfiles/6h_feh_singlepeak/{}/{} (Max).{}.tif'.format(catchment_name, region, '{}', catchment_name_str)

# <u> Flood extent </u>
To examine whether the rainfall's temporal distribution influences the total extent of flooding, the number of flooded cells and the total flooded area in km2 (incl. only cells with depth >0.1m) is compared between the profile with a single peak, and the three methods for producing multi-peaked rainfall events. b

### Create dataframes containing the (total/urban) flooded area in each depth/velocity bin

In [11]:
# Define breaks to split the depths/velocities on
breaks_depths = np.array([0, 0.3, 0.6, 1.2, 100])  
labels_depth = ['<=0.3m', '0.3-0.6m', '0.6-1.2m', '>1.2m']
breaks_velocity = np.array([0,0.25,0.5,2,100])
labels_velocity = ["<=0.25m/s", "0.25-0.5m/s", "0.5-2m/s", ">2m/s"]

In [12]:
velocity_counts, velocity_props = create_binned_counts_and_props(methods, fps, '', 'Velocity',catchment_gdf, crop_or_not=crop_or_not)
depth_counts, depth_props  = create_binned_counts_and_props(methods, fps, '', 'Depth',catchment_gdf, crop_or_not = crop_or_not)

In [13]:
velocity_counts_urban, velocity_props_urban = create_binned_counts_and_props(methods, fps, True,'Velocity', catchment_gdf,
                                                         crop_or_not, landcover_urban_flat)
depth_counts_urban, depth_props_urban= create_binned_counts_and_props(methods, fps, True,'Depth',  catchment_gdf, 
                                                                     crop_or_not, landcover_urban_flat)

In [14]:
velocity_counts_notwater, velocity_props_notwater = create_binned_counts_and_props(methods, fps, True,'Velocity', catchment_gdf,
                                                                     crop_or_not, landcover_notwater_flat)
depth_counts_notwater, depth_props_notwater = create_binned_counts_and_props(methods, fps, True,'Depth',catchment_gdf, 
                                                                    crop_or_not, landcover_notwater_flat)

### Create dataframes containing the (total/urban) flooded area

In [17]:
totals_df = create_totals_df(velocity_counts, cell_size_in_m2)

In [16]:
totals_df_urban = create_totals_df(velocity_counts_urban, cell_size_in_m2)  
totals_df_notwater = create_totals_df(velocity_counts_notwater, cell_size_in_m2)  

### Create dataframes containing the % diff in the flooded area between single peak and each other method  

In [18]:
if methods_key == 'Observed':
    column_for_comparison = '6h_feh_singlepeak'
elif methods_key == 'Idealised':
    column_for_comparison ='6h_sp_c_0.5'    
elif methods_key == 'SinglePeak_Scaled':
    column_for_comparison ='6h_sp_+0%'        
    
percent_diffs_df = find_percentage_diff (methods, column_for_comparison, totals_df, fps) 
percent_diffs_df_urban = find_percentage_diff (methods, column_for_comparison, totals_df_urban, fps)
percent_diffs_df_notwater = find_percentage_diff (methods, column_for_comparison, totals_df_notwater, fps)

## Find number of cells with each hazard rating

In [19]:
hazard_counts, hazard_props = create_binned_counts_and_props_hazard(methods, fps, '', catchment_name_str,catchment_gdf, crop_or_not)
hazard_counts_urban, hazard_props_urban = create_binned_counts_and_props_hazard(methods, fps, 'Urban', catchment_name_str,
                                                                catchment_gdf, crop_or_not, landcover_urban)
hazard_counts_notwater, hazard_props_notwater = create_binned_counts_and_props_hazard(methods, fps, 'Notwater', 
                                                    catchment_name_str,catchment_gdf, crop_or_not, landcover_notwater)

## Find number of cells which have moved between hazard categories

In [45]:
# hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(methods, fps, catchment_name_str, bbox)
# hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(methods, fps, catchment_name_str, bbox)
# hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(methods, fps, catchment_name_str, bbox)

### Create a dataframe containing all the info on each of the scenarios

In [20]:
cluster_results = pd.DataFrame({'Cluster_num': methods, "MaxRainfallIntensity": maxs,  
    "MaxRainfallIntensityMinute": min_of_maxs,
    # All cells
   'FloodedArea':totals_df['FloodedArea'],
    '%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs'],
    '%Diff_FloodedArea_fromSP_formatted':percent_diffs_df['percent_diff_formatted'],
    'Abs%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs_abs'],
    # Urban cells
 'UrbanFloodedArea':totals_df_urban['FloodedArea'],
 '%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs'] ,
  '%Diff_UrbanFloodedArea_fromSP_formatted':percent_diffs_df_urban['percent_diff_formatted'],
   'Abs%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs_abs'], 
    # Not water cells
 'NotwaterFloodedArea':totals_df_notwater['FloodedArea'],
 '%Diff_NotwaterFloodedArea_fromSP':percent_diffs_df_notwater['percent_diffs'] ,
  '%Diff_NotwaterFloodedArea_fromSP_formatted':percent_diffs_df_notwater['percent_diff_formatted'],
   'Abs%Diff_NotwaterFloodedArea_fromSP':percent_diffs_df_notwater['percent_diffs_abs'],                                          
   #'WorstCaseDepth_ncells': worst_case_method_depth['counts'].tolist(),
   # 'WorstCaseVelocity_ncells': worst_case_method_velocity['counts'].tolist(), 
                                'colour':colours_df['colour']}) 

### Add the depth/velocity category breakdowns and hazard categories to this

In [21]:
dfs = [velocity_props, depth_props,  velocity_props_urban, depth_props_urban,  velocity_props_notwater, depth_props_notwater,   
       velocity_counts, depth_counts, velocity_counts_urban, depth_counts_urban, velocity_counts_notwater, depth_counts_notwater,  
       hazard_counts, hazard_props, hazard_counts_urban, hazard_props_urban, hazard_counts_notwater, hazard_props_notwater, ]
suffixes = ['_propcells', '_propcells', '_propcells_urban','_propcells_urban','_propcells_notwater','_propcells_notwater',
            '_countcells','_countcells','_countcells_urban', '_countcells_urban','_countcells_notwater', '_countcells_notwater', '_countcells_notwater', '_countcells_notwater',
            '_countcells', '_propcells',  '_countcells_urban', '_propcells_urban', '_countcells_notwater', '_propcells_notwater', ]

for num, df in enumerate(dfs):
    # Reformat the dataframe
    df = df.set_index('index').T
    # Add the correct suffix to the column names
    df = df.add_suffix(suffixes[num]) 
    # Add Cluster_num column for joining
    df['Cluster_num'] = df.index#
    # Join to cluster results dataframe
    cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')
    
# cluster_results = pd.merge(cluster_results, hazard_cat_changes,  how="outer", on = 'Cluster_num')    

### Finding proportion of area/urban area flooded

In [48]:
# cluster_results['%floodedarea_urban'] = round(cluster_results['UrbanFloodedArea']/cluster_results['FloodedArea']*100,2)
# cluster_results['%_of_area_flooded'] =(cluster_results['FloodedArea']/29.589)*100
# cluster_results['%_of_urban_area_flooded'] =(cluster_results['UrbanFloodedArea']/7.987)*100
# # Add NAs for SP
# cluster_results['%Diff_FloodedArea_fromSP_formatted']=cluster_results['%Diff_FloodedArea_fromSP_formatted'].fillna('')
# cluster_results['%Diff_UrbanFloodedArea_fromSP_formatted']=cluster_results['%Diff_UrbanFloodedArea_fromSP_formatted'].fillna('')

## Summarise the number of cells in different depth/velocity categories

#### Get one dataframe containing the values for all methods, one row per cell per method 
Also including the water class variable in that cell

In [49]:
# each_cells_value = produce_df_of_cell_by_cell_values(model_directory, catchment_name_str, bbox, methods, landcover_water_flat, landcover_urban_flat)
# # rename for consistency
# each_cells_value['short_id'] = each_cells_value['short_id'].map({'6h_feh_singlepeak': 'FEH'}).fillna(each_cells_value['short_id'] )

### Rename the profile names

In [22]:
if methods_key == 'Idealised':
    cluster_results['Cluster_num']=['C', 'FL1', 'FL2', 'FL3', 'FL4','BL6', 'BL7', 'BL8','BL9']
    ### Reorder to C in middle
    cluster_results = cluster_results.reindex([1,2,3,4,0,5,6,7,8])
    cluster_results.reset_index(inplace=True, drop=True)
if methods_key == 'Observed':
    methods = ['6h_feh_singlepeak','6h_c1','6h_c8','6h_c15','6h_c3','6h_c11','6h_c10','6h_c9','6h_c13','6h_c6',
                 '6h_c2','6h_c12','6h_c14','6h_c4','6h_c7','6h_c5']
    cluster_results = cluster_results.reindex(cluster_results['Cluster_num'].map(dict(zip(methods, range(len(methods))))).sort_values().index)
    cluster_results.reset_index(inplace=True, drop=True)
    cluster_results['Cluster_num'] = cluster_results['Cluster_num'].map({'6h_feh_singlepeak': 'FEH'}).fillna(cluster_results['Cluster_num'] )

### Save to file

In [23]:
# Create path to the folder
path = "Outputs/Data/{}Profiles/{}/".format(methods_key, catchment_name)
# Check whether the specified path exists or not
isExist = os.path.exists(path)
# Create a new directory because it does not exist
if not isExist:
    os.makedirs(path)
# Save
cluster_results.to_csv(path + "{}allclusters_summary.csv".format(region), index=False)