In [2]:
catchment_name = 'WykeBeck' #LinDyke
methods_key ='Observed'
region = '' # 'Kippax' #'' # 'Garforth'

In [4]:
# Set up ink to model directory and read in catchment shapefile
model_directory = '../../../FloodModelling/{}Models/Model_{}Profiles/'.format(catchment_name, methods_key)
landcover_directory = '../../../FloodModelling/{}Models/LandCoverData/'.format(catchment_name)

# Define whether to filter out values <0.1
remove_little_values = True

In [6]:
import pandas as pd
import numpy as np
import sys
import geopandas as gpd

sys.path.append("../")
from my_functions import *

# Specify strings relating to catchment
if catchment_name == 'LinDyke':
    catchment_name_str = "Resampled.Terrain" 
    minx, miny, maxx, maxy = 437000,  426500,  445500, 434300
    catchment_gdf = gpd.read_file(model_directory + 'CatchmentLinDyke_exported.shp')
    cell_size_in_m2 = 1
elif catchment_name == 'WykeBeck':
    catchment_name_str = "Terrain.wykeDEM" 
    minx, miny, maxx, maxy = 430004,  429978, 438660, 440996 
    cell_size_in_m2 = 4
    catchment_gdf = gpd.read_file(model_directory + 'WykeBeckCatchment.shp')
    
# Create a bounding box (this is used in preparing the rasters)
bbox = box(minx, miny, maxx, maxy)   

In [7]:
def create_binned_counts_and_props(methods, fps, filter_by_land_cover, variable_name, bbox,catchment_gdf, 
                                   landcover_data=False, remove_little_values = True,):

    if variable_name =='Depth':
        breaks = np.array([0, 0.3, 0.6, 1.2, 100])  
        labels = ['<=0.3m', '0.3-0.6m', '0.6-1.2m', '>1.2m']
    elif variable_name =='Velocity':
        breaks = np.array([0,0.25,0.5,2,100])
        labels = ["<=0.25m/s", "0.25-0.5m/s", "0.5-2m/s", ">2m/s"]
        
    # Create dataframes to populate with values
    counts_df = pd.DataFrame()
    proportions_df = pd.DataFrame()        

    # Loop through each rainfall scenario
    # Get the raster containing its values, and count the number of each unique value, and construct into a dataframe
    for num, fp in enumerate(fps) :
        # Classify depth/velocity rasters into depth/velocity bins
        #raster = prepare_rainfall_scenario_raster(fp.format(variable_name), bbox, remove_little_values)[0]

        with rasterio.open(fp.format(variable_name)) as src:
            catchment_gdf=catchment_gdf.to_crs(src.crs)
            # print(Vector.crs)
            out_image, out_transform=mask(src,catchment_gdf.geometry,crop=True)
            out_meta=src.meta.copy() # copy the metadata of the source DEM
            raster = out_image[0]
            raster[raster == -9999.] = np.nan
            
            if remove_little_values == True:
                if "Depth" in fp:
                    raster = np.where(raster <0.1, np.nan, raster)    
                else:
                    with rasterio.open(fp.format('Depth')) as src:
                        out_image, out_transform=mask(src,catchment_gdf.geometry,crop=True)
                        out_meta=src.meta.copy() # copy the metadata of the source DEM
                        depth_raster = out_image[0]
                        depth_raster[depth_raster == -9999.] = np.nan
                        raster = np.where(depth_raster <0.1, np.nan, raster)            
            
        # If analysing all cells
        if filter_by_land_cover == '':
            unique, counts = np.unique(raster, return_counts=True)
            df = pd.DataFrame({'values': unique, 'value':counts})

            # Add a new column specifying the bin which each value falls within
            df['bins']= pd.cut(unique, bins=breaks, right=False)

        # If just analysing urban cells
        elif filter_by_land_cover == True:
            raster_and_landcover = pd.DataFrame({'landcovercategory':  landcover_data, 'value': raster.flatten()})
            # Get just the relevant rows
            df = raster_and_landcover[raster_and_landcover['landcovercategory']==10].copy()  
            # Add a column assigning a bin based on the depth/velocity value
            df['bins']= pd.cut(df['value'], bins=breaks, right=False)


        # Create a new dataframe showing the number of cells in each of the bins
        groups = df.groupby(['bins']).count()
        groups  = groups.reset_index()
        groups.rename(columns={"value": "Count"},inplace=True)

        # Find the total number of cells
        total_n_cells = groups['Count'].sum()
        # Find the number of cells in each group as a proportion of the total
        groups['Proportion'] = round((groups['Count']/total_n_cells) *100,1)

        # Add values to dataframes
        method_name = methods[num]
        counts_df[method_name] = groups['Count']
        proportions_df[method_name] = groups['Proportion']

    # Reset index to show the groups
    counts_df.reset_index(inplace=True)
    proportions_df.reset_index(inplace=True)

    # Set index values
    counts_df['index'] = labels
    proportions_df['index'] = labels
    
    return counts_df,proportions_df


def create_binned_counts_and_props_hazard(methods, fps, filter_by_land_cover, catchment_name_str, catchment_gdf,bbox, landcover_data=False):

    # Create dataframes to populate with values
    counts_df = pd.DataFrame()
    proportions_df = pd.DataFrame()      

    for num, fp in enumerate(fps):
        # Define filepath
        fp = fp.replace('{} (Max).{}'.format({}, catchment_name_str),'hazard_classified')
        # Read in data
        #hazard = prepare_rainfall_scenario_raster(fp, bbox, remove_little_values)[0]
        
        with rasterio.open(fp) as src:
            catchment_gdf=catchment_gdf.to_crs(src.crs)
            # print(Vector.crs)
            out_image, out_transform=mask(src,catchment_gdf.geometry,crop=True)
            out_meta=src.meta.copy() # copy the metadata of the source DEM
            hazard = out_image[0]
            hazard[hazard == -9999.] = np.nan
                
        # If fdiltering by land cover, then do additional stage of filtering out only cells in that category
        if filter_by_land_cover != '':
            # Get dataframe of hazard values, alongside land cover class
            hazard_and_landcover = pd.DataFrame({'landcovercategory':  landcover_data.flatten(), 'counts': hazard.flatten()})
            # Keep just the rows in the relevant landcoverclass
            df = hazard_and_landcover[hazard_and_landcover['landcovercategory']==10].copy()  
            # remove the NA values (i.e. where there is no flooding)
            df=df[df.counts.notnull()]
            # Convert the counts back into an array
            hazard = np.array(df['counts'])
       
        # Count number of cells in each hazard category
        unique, counts = np.unique(hazard, return_counts=True)
        df = pd.DataFrame({'values': unique, 'counts':counts})
        # Remove Nan values
        df = df.dropna()
        
        # Find the total number of cells
        total_n_cells = df ['counts'].sum()
        # Find the number of cells in each group as a proportion of the total
        df['Proportion'] = round((df['counts']/total_n_cells) *100,1)
        
        # Add values to dataframes
        method_name = methods[num]
        counts_df[method_name] = df['counts']
        proportions_df[method_name] = df['Proportion']

    # Reset index to show the groups
    counts_df.reset_index(inplace=True)
    proportions_df.reset_index(inplace=True)

    # Set index values
    labels_hazard = ['Low hazard', 'Moderate hazard', 'Significant hazard', 'Extreme hazard']
    counts_df['index'] = labels_hazard
    proportions_df['index'] = labels_hazard
    return counts_df, proportions_df

### Define the names of the method (in dictionary for different model runs)

In [8]:
methods_dict = {'Idealised': [ '6h_sp_c_0.5','6h_sp_fl_0.1', '6h_sp_fl_0.2', '6h_sp_fl_0.3', '6h_sp_fl_0.4',
                    '6h_sp_bl_0.6','6h_sp_bl_0.7','6h_sp_bl_0.8','6h_sp_bl_0.9'],
                'Observed':['6h_feh_singlepeak', '6h_c1','6h_c2','6h_c3','6h_c4', '6h_c5', '6h_c6','6h_c7',
             '6h_c8','6h_c9','6h_c10', '6h_c11', '6h_c12','6h_c13','6h_c14','6h_c15'], 
               'SinglePeak_Scaled':['6h_sp_+0%','6h_sp_+5%','6h_sp_+10%','6h_sp_+15%','6h_sp_+20%']}

In [9]:
methods = methods_dict[methods_key]

### Get version of landcover array with just 'urban' and 'rural' categories

In [10]:
# # Read in the data
# landcover, out_meta = prepare_rainfall_scenario_raster(model_directory + "../LandCoverData/{}/LandCover_clipped.tif".format(region), bbox, True)
# # Convert the 1 and 6 values to 10 (for urban) and the rest to 11 (for non-urban).  
# landcover_mod =  np.where(landcover==1, 10, landcover)
# landcover_mod =  np.where(landcover_mod==6, 10, landcover_mod)
# # Convert the rest of the classes to 11
# for i in [1,2,3,4,5,7,8,9]:
#     landcover_mod =  np.where(landcover_mod==i, 11, landcover_mod)

In [11]:
# Urban landcover classification - 10 is urban, 1 is everything else
landcover_urban, out_meta = open_and_clip(landcover_directory + 'LandCover_urban_and_suburban_classification.tif', bbox)
landcover_urban_flat = landcover_urban.flatten()

In [13]:
# Water landcover classification - 10 is water, 11 is eveyrthing else
landcover_water, out_meta = open_and_clip(landcover_directory + 'LandCover_Freshwater_classification.tif', bbox)
landcover_water_flat = landcover_water.flatten()

# Water landcover classification - 10 is water, 11 is eveyrthing else
landcover_notwater, out_meta = open_and_clip(landcover_directory + 'LandCover_notwater_classification.tif', bbox)
landcover_notwater_flat = landcover_notwater.flatten()

# # Urban landcover classification - 10 is urban, 1 is everything else
# landcover_urban, out_meta = open_and_clip(landcover_directory + 'LandCover_Urban_classification.tif', bbox)
# landcover_urban_flat = landcover_urban.flatten()

# # Urban landcover classification - 10 is urban, 1 is everything else
# landcover_suburban, out_meta = open_and_clip(landcover_directory + 'LandCover_SubUrban_classification.tif', bbox)
# landcover_suburban_flat = landcover_suburban.flatten()

# # Water landcover classification - 10 is water, 11 is eveyrthing else
# landcover_arable, out_meta = open_and_clip(landcover_directory + 'LandCover_Arable_classification.tif', bbox)
# landcover_arable_flat = landcover_arable.flatten()

# # Water landcover classification - 10 is water, 11 is eveyrthing else
# landcover_cg, out_meta = open_and_clip(landcover_directory + 'LandCover_CalcareousGrassland_classification.tif', bbox)
# landcover_cg_flat = landcover_cg.flatten()

# # Urban landcover classification - 10 is urban, 1 is everything else
# landcover_ig, out_meta = open_and_clip(landcover_directory + 'LandCover_ImprovedGrassland_classification.tif', bbox)
# landcover_ig_flat = landcover_ig.flatten()

# # Urban landcover classification - 10 is urban, 1 is everything else
# landcover_ng, out_meta = open_and_clip(landcover_directory + 'LandCover_NeutralGrassland_classification.tif', bbox)
# landcover_ng_flat = landcover_ng.flatten()

# # Urban landcover classification - 10 is urban, 1 is everything else
# landcover_hg, out_meta = open_and_clip(landcover_directory + 'LandCover_HeatherGrassland_classification.tif', bbox)
# landcover_hg_flat = landcover_hg.flatten()

# # Urban landcover classification - 10 is urban, 1 is everything else
# landcover_dw, out_meta = open_and_clip(landcover_directory + 'LandCover_DeciduousWoodland_classification.tif', bbox)
# landcover_dw_flat = landcover_dw.flatten()

### Find maximum intensity for each method and minute in which it occurs (to use in sorting results analysis)

In [15]:
maxs = []
min_of_maxs = []

for method in methods:
    if method == '6h_feh_singlepeak':
        precip=pd.read_csv("../CreateSyntheticRainfallEvents/FEHProfiles/{}/6hr_100yrRP/PostLossRemoval/6hr_100yrRP_6.01h_1mintimestep.csv".format(catchment_name))
    else:
        precip=pd.read_csv("../CreateSyntheticRainfallEvents/{}Profiles/{}/6hr_100yrRP/PostLossRemoval/{}_urban.csv".format(methods_key,catchment_name, method))
    # Trim and add minutes column
    precip = precip[0:360].copy()
    precip['minute']=range(1,361)
    # Add max and minutes of max
    maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].max())
    min_of_maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].idxmax())

### Create versions of lists of methods, in order based on max intensity and the the timing of the max intensity 

In [16]:
short_ids_by_loading=  pd.DataFrame({"min": min_of_maxs, 'method_name': methods}).sort_values('min')["method_name"].tolist()
short_ids_by_intensity = pd.DataFrame({"min": maxs, 'method_name': methods}).sort_values('min', ascending = False)["method_name"].tolist()

### Create dataframe of colours for each cluster (based on their loading)

In [17]:
if methods_key == 'Observed':
    colours_df = create_colours_df_observed(short_ids_by_loading, methods)
elif methods_key == 'Idealised':
    colours_df = create_colours_df_idealised( short_ids_by_loading, methods)
elif methods_key == 'SinglePeak_Scaled':
    colours_df = create_colours_df_sp( short_ids_by_loading, methods)

### Create list of filepaths, formatted to be used for either depth or velocity

In [18]:
fps = []
for method_num, short_id in enumerate(methods):
    fp = model_directory + "{}/{} (Max).{}.tif".format(short_id, '{}', catchment_name_str)
    fps.append(fp)
if methods_key == 'Observed':
    fps[0] = '../../../FloodModelling/{}Models/Model_FEHProfiles/6h_feh_singlepeak/{}/{} (Max).{}.tif'.format(catchment_name, region, '{}', catchment_name_str)

# <u> Flood extent </u>
To examine whether the rainfall's temporal distribution influences the total extent of flooding, the number of flooded cells and the total flooded area in km2 (incl. only cells with depth >0.1m) is compared between the profile with a single peak, and the three methods for producing multi-peaked rainfall events. b

### Create dataframes containing the (total/urban) flooded area in each depth/velocity bin

In [19]:
# Define breaks to split the depths/velocities on
breaks_depths = np.array([0, 0.3, 0.6, 1.2, 100])  
labels_depth = ['<=0.3m', '0.3-0.6m', '0.6-1.2m', '>1.2m']
breaks_velocity = np.array([0,0.25,0.5,2,100])
labels_velocity = ["<=0.25m/s", "0.25-0.5m/s", "0.5-2m/s", ">2m/s"]

In [20]:
velocity_counts, velocity_props = create_binned_counts_and_props(methods, fps, '', 'Velocity',bbox,catchment_gdf, )
depth_counts, depth_props  = create_binned_counts_and_props(methods, fps, '', 'Depth',bbox,catchment_gdf, )

In [21]:
velocity_counts_urban, velocity_props_urban = create_binned_counts_and_props(methods, fps, True,'Velocity',bbox, 
                                                                             catchment_gdf,landcover_urban_flat)
depth_counts_urban, depth_props_urban= create_binned_counts_and_props(methods, fps, True,'Depth',bbox, 
                                                                       catchment_gdf, landcover_urban_flat)

ValueError: All arrays must be of the same length

In [None]:
velocity_counts_suburban, velocity_props_suburban = create_binned_counts_and_props(methods, fps, True,'Velocity',bbox, 
                                                                             catchment_gdf,landcover_suburban_flat)
depth_counts_suburban, depth_props_suburban= create_binned_counts_and_props(methods, fps, True,'Depth',bbox, 
                                                                       catchment_gdf, landcover_suburban_flat)

In [None]:
velocity_counts_notwater, velocity_props_notwater = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_notwater_flat)
depth_counts_notwater, depth_props_notwater = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_notwater_flat)

In [None]:
velocity_counts_ng, velocity_props_ng = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_ng_flat)
depth_counts_ng, depth_props_ng = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_ng_flat)

In [None]:
velocity_counts_ig, velocity_props_ig = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_ig_flat)
depth_counts_ig, depth_props_ig = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_ig_flat)

In [None]:
velocity_counts_cg, velocity_props_cg = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_cg_flat)
depth_counts_cg, depth_props_cg = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_cg_flat)

In [None]:
velocity_counts_hg, velocity_props_hg = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_hg_flat)
depth_counts_hg, depth_props_hg = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_hg_flat)

In [None]:
velocity_counts_dw, velocity_props_dw = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_dw_flat)
depth_counts_dw, depth_props_dw = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_dw_flat)

In [None]:
velocity_counts_water, velocity_props_water = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_water_flat)
depth_counts_water, depth_props_water = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_water_flat)

In [None]:
velocity_counts_arable, velocity_props_arable = create_binned_counts_and_props(methods, fps, True,'Velocity', bbox, 
                                                                                   catchment_gdf,landcover_arable_flat)
depth_counts_arable, depth_props_arable = create_binned_counts_and_props(methods, fps, True,'Depth',bbox,catchment_gdf, 
                                                                             landcover_arable_flat)

### Create dataframes containing the (total/urban) flooded area

In [None]:
totals_df = create_totals_df(velocity_counts, cell_size_in_m2)
totals_df

In [None]:
totals_df_urban = create_totals_df(velocity_counts_urban, cell_size_in_m2)  
totals_df_suburban = create_totals_df(velocity_counts_suburban, cell_size_in_m2)  
totals_df_notwater = create_totals_df(velocity_counts_notwater, cell_size_in_m2)  
totals_df_water = create_totals_df(velocity_counts_water, cell_size_in_m2)  
totals_df_arable= create_totals_df(velocity_counts_arable, cell_size_in_m2)  
totals_df_dw = create_totals_df(velocity_counts_dw, cell_size_in_m2)  
totals_df_hg = create_totals_df(velocity_counts_hg, cell_size_in_m2)  
totals_df_ig = create_totals_df(velocity_counts_ig, cell_size_in_m2)  
totals_df_cg = create_totals_df(velocity_counts_cg, cell_size_in_m2)  
totals_df_ng = create_totals_df(velocity_counts_ng, cell_size_in_m2)  

totals_dfs = [totals_df_urban, totals_df_suburban, totals_df_water, totals_df_arable, totals_df_dw, totals_df_hg,
             totals_df_hg, totals_df_ig, totals_df_cg, totals_df_ng]

In [None]:
for totals_df in totals_dfs:
    print(totals_df['FloodedArea'][0])
    
# totals_df_water['FloodedArea'][0] +totals_df_urban['FloodedArea'][0] +totals_df_suburban['FloodedArea'][0] + totals_df_arable['FloodedArea'][0]+ totals_df_dw['FloodedArea'][0]+totals_df_ig['FloodedArea'][0]+totals_df_cg['FloodedArea'][0]+totals_df_hg['FloodedArea'][0]+totals_df_ng['FloodedArea'][0]

In [None]:
totals_df_urban['FloodedArea'][0]

### Create dataframes containing the % diff in the flooded area between single peak and each other method  

In [None]:
if methods_key == 'Observed':
    column_for_comparison = '6h_feh_singlepeak'
elif methods_key == 'Idealised':
    column_for_comparison ='6h_sp_c_0.5'    
elif methods_key == 'SinglePeak_Scaled':
    column_for_comparison ='6h_sp_+0%'        
    
percent_diffs_df = find_percentage_diff (methods, column_for_comparison, totals_df, fps) 
percent_diffs_df_urban = find_percentage_diff (methods, column_for_comparison, totals_df_urban, fps)
percent_diffs_df_notwater = find_percentage_diff (methods, column_for_comparison, totals_df_notwater, fps)
percent_diffs_df_water = find_percentage_diff (methods, column_for_comparison, totals_df_water, fps)

In [None]:
def get_change(current, previous):
    if current == previous:
        return 100.0
    try:
        return (abs(current - previous) / previous) * 100.0
    except ZeroDivisionError:
        return 0

In [None]:
def find_percentage_diff (methods, reference_method_name, totals_df, fps):
    percent_diffs_formatted_for_plot = []
    percent_diffs_abs = []
    percent_diffs = []

    sp_value = totals_df.loc[totals_df['short_id'] == reference_method_name]['FloodedArea'].values[0]

    for num, fp in enumerate(fps):
        rainfall_scenario_name = methods[num]
        if rainfall_scenario_name!= reference_method_name:
            # FInd value for this scenario
            this_scenario_value = totals_df.loc[totals_df['short_id'] == rainfall_scenario_name]['FloodedArea'].values[0]
            # FInd % difference between single peak and this scenario
            #percent_diffs.append(round((this_scenario_value/sp_value-1)*100,1)[0])
            percent_diffs.append(get_change(this_scenario_value, sp_value))
            #percent_diffs.append(round((this_scenario_value/sp_value-1)*100,1))
            percent_diffs_abs.append(round(abs((this_scenario_value/sp_value-1))*100,1))
            percent_diffs_formatted_for_plot.append(round((this_scenario_value/sp_value-1)*100,1))
    # Convert values to strings, and add a + sign for positive values
    # Include an empty entry for the single peak scenario
    percent_diffs_df = pd.DataFrame({'percent_diff_formatted':[''] +['+' + str(round((list_item),2)) + '%' if list_item > 0 else str(round((list_item),2)) +
     '%'  for list_item in percent_diffs_formatted_for_plot] ,
             'percent_diffs':[0] + percent_diffs, 'percent_diffs_abs':[0] + percent_diffs_abs })
    return percent_diffs_df

## Find number of cells in which each method leads to the worst flooding (depth/velocity)

In [None]:
# # Find the number of flooded cells with the worst flooding for each method
# worst_case_method_depth = find_worst_case_method(fps, methods, 'Depth')
# worst_case_method_velocity = find_worst_case_method(fps, methods,  'Velocity') 

In [None]:
# # Remove multiple matches and nan
# worst_case_method_depth = worst_case_method_depth[~worst_case_method_depth['values'].isin(['multiple matches','nan'])]
# worst_case_method_velocity = worst_case_method_velocity[~worst_case_method_velocity['values'].isin(['multiple matches','nan'])]

# # # Reorder (and also add in the methods that are missing)
# worst_case_method_depth = pd.merge(worst_case_method_depth,  pd.DataFrame({'values': methods}), how="outer")
# worst_case_method_depth = worst_case_method_depth.reindex(worst_case_method_depth['values'].map(dict(zip(methods, range(len(methods))))).sort_values().index)
# worst_case_method_depth.reset_index(inplace=True,drop=True)

# worst_case_method_velocity = pd.merge(worst_case_method_velocity,  pd.DataFrame({'values': methods}), how="outer")
# worst_case_method_velocity = worst_case_method_velocity.reindex(worst_case_method_velocity['values'].map(dict(zip(methods, range(len(methods))))).sort_values().index)
# worst_case_method_velocity.reset_index(inplace=True,drop=True)

## Find number of cells with each hazard rating

In [None]:
hazard_counts, hazard_props = create_binned_counts_and_props_hazard(methods, fps, '', catchment_name_str,catchment_gdf, bbox)
hazard_counts_urban, hazard_props_urban = create_binned_counts_and_props_hazard(methods, fps, 'Urban', catchment_name_str,catchment_gdf, bbox, landcover_urban)
hazard_counts_notwater, hazard_props_notwater = create_binned_counts_and_props_hazard(methods, fps, 'Notwater', catchment_name_str, catchment_gdf,bbox, landcover_notwater)
hazard_counts_water, hazard_props_water = create_binned_counts_and_props_hazard(methods, fps, 'Water', catchment_name_str, catchment_gdf,bbox, landcover_water)

## Find number of cells which have moved between hazard categories

In [None]:
hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(methods, fps, catchment_name_str, bbox)
hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(methods, fps, catchment_name_str, bbox)
hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(methods, fps, catchment_name_str, bbox)

### Create a dataframe containing all the info on each of the scenarios

In [None]:
cluster_results = pd.DataFrame({'Cluster_num': methods, "MaxRainfallIntensity": maxs,  
    "MaxRainfallIntensityMinute": min_of_maxs,
    # All cells
   'FloodedArea':totals_df['FloodedArea'],
    '%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs'],
    '%Diff_FloodedArea_fromSP_formatted':percent_diffs_df['percent_diff_formatted'],
    'Abs%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs_abs'],
    # Urban cells
 'UrbanFloodedArea':totals_df_urban['FloodedArea'],
 '%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs'] ,
  '%Diff_UrbanFloodedArea_fromSP_formatted':percent_diffs_df_urban['percent_diff_formatted'],
   'Abs%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs_abs'], 
    # Not water cells
 'NotwaterFloodedArea':totals_df_notwater['FloodedArea'],
 '%Diff_NotwaterFloodedArea_fromSP':percent_diffs_df_notwater['percent_diffs'] ,
  '%Diff_NotwaterFloodedArea_fromSP_formatted':percent_diffs_df_notwater['percent_diff_formatted'],
   'Abs%Diff_NotwaterFloodedArea_fromSP':percent_diffs_df_notwater['percent_diffs_abs'],    
    # Water cells
 'WaterFloodedArea':totals_df_water['FloodedArea'],
 '%Diff_WaterFloodedArea_fromSP':percent_diffs_df_water['percent_diffs'] ,
  '%Diff_WaterFloodedArea_fromSP_formatted':percent_diffs_df_water['percent_diff_formatted'],
   'Abs%Diff_WaterFloodedArea_fromSP':percent_diffs_df_water['percent_diffs_abs'],                                        
   #'WorstCaseDepth_ncells': worst_case_method_depth['counts'].tolist(),
   # 'WorstCaseVelocity_ncells': worst_case_method_velocity['counts'].tolist(), 
                                'colour':colours_df['colour']}) 

### Add the depth/velocity category breakdowns and hazard categories to this

In [None]:
dfs = [velocity_props, depth_props,  velocity_props_urban, depth_props_urban,  velocity_props_notwater, depth_props_notwater, velocity_props_water, depth_props_water, 
       velocity_counts, depth_counts, velocity_counts_urban, depth_counts_urban, velocity_counts_notwater, depth_counts_notwater, velocity_counts_water, depth_counts_water,
       hazard_counts, hazard_props, hazard_counts_urban, hazard_props_urban, hazard_counts_notwater, hazard_props_notwater, hazard_counts_water, hazard_props_water]
suffixes = ['_propcells', '_propcells', '_propcells_urban','_propcells_urban','_propcells_notwater','_propcells_notwater','_propcells_water','_propcells_water',
            '_countcells','_countcells','_countcells_urban', '_countcells_urban','_countcells_notwater', '_countcells_notwater', '_countcells_notwater', '_countcells_notwater',
            '_countcells', '_propcells',  '_countcells_urban', '_propcells_urban', '_countcells_notwater', '_propcells_notwater', '_countcells_water', '_propcells_water']

for num, df in enumerate(dfs):
    # Reformat the dataframe
    df = df.set_index('index').T
    # Add the correct suffix to the column names
    df = df.add_suffix(suffixes[num]) 
    # Add Cluster_num column for joining
    df['Cluster_num'] = df.index#
    # Join to cluster results dataframe
    cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')
    
# cluster_results = pd.merge(cluster_results, hazard_cat_changes,  how="outer", on = 'Cluster_num')    

### Finding proportion of area/urban area flooded

In [None]:
# cluster_results['%floodedarea_urban'] = round(cluster_results['UrbanFloodedArea']/cluster_results['FloodedArea']*100,2)
# cluster_results['%_of_area_flooded'] =(cluster_results['FloodedArea']/29.589)*100
# cluster_results['%_of_urban_area_flooded'] =(cluster_results['UrbanFloodedArea']/7.987)*100
# # Add NAs for SP
# cluster_results['%Diff_FloodedArea_fromSP_formatted']=cluster_results['%Diff_FloodedArea_fromSP_formatted'].fillna('')
# cluster_results['%Diff_UrbanFloodedArea_fromSP_formatted']=cluster_results['%Diff_UrbanFloodedArea_fromSP_formatted'].fillna('')

## Summarise the number of cells in different depth/velocity categories

#### Get one dataframe containing the values for all methods, one row per cell per method 
Also including the water class variable in that cell

In [None]:
# each_cells_value = produce_df_of_cell_by_cell_values(model_directory, catchment_name_str, bbox, methods, landcover_water_flat, landcover_urban_flat)
# # rename for consistency
# each_cells_value['short_id'] = each_cells_value['short_id'].map({'6h_feh_singlepeak': 'FEH'}).fillna(each_cells_value['short_id'] )

### Rename the profile names

In [None]:
if methods_key == 'Idealised':
    cluster_results['Cluster_num']=['C', 'FL1', 'FL2', 'FL3', 'FL4','BL6', 'BL7', 'BL8','BL9']
    ### Reorder to C in middle
    cluster_results = cluster_results.reindex([1,2,3,4,0,5,6,7,8])
    cluster_results.reset_index(inplace=True, drop=True)
if methods_key == 'Observed':
    methods = ['6h_feh_singlepeak','6h_c1','6h_c8','6h_c15','6h_c3','6h_c11','6h_c10','6h_c9','6h_c13','6h_c6',
                 '6h_c2','6h_c12','6h_c14','6h_c4','6h_c7','6h_c5']
    cluster_results = cluster_results.reindex(cluster_results['Cluster_num'].map(dict(zip(methods, range(len(methods))))).sort_values().index)
    cluster_results.reset_index(inplace=True, drop=True)
    cluster_results['Cluster_num'] = cluster_results['Cluster_num'].map({'6h_feh_singlepeak': 'FEH'}).fillna(cluster_results['Cluster_num'] )

### Save to file

In [None]:
# Create path to the folder
path = "Outputs/Data/{}Profiles/{}/".format(methods_key, catchment_name)
# Check whether the specified path exists or not
isExist = os.path.exists(path)
# Create a new directory because it does not exist
if not isExist:
    os.makedirs(path)
# Save
cluster_results.to_csv(path + "{}allclusters_summary.csv".format(region), index=False)
# cluster_results.to_csv(path + "{}allclusters_summary_arable_dw_ig.csv".format(region), index=False)
# cluster_results.to_csv(path + "{}allclusters_summary_ng_cg_hg.csv".format(region), index=False)

# Save
# each_cells_value.to_csv(path + "{}individual_cell_values.csv".format(region), index=False)

### Delete tiff files (as these aren't used again and take up a lot of space)

In [None]:
# for method in short_ids:
#     print(method)
#     if method != '6h_feh_sp':
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/hazard_cat_difference.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_difffromsinglepeak_classified.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_difffromsinglepeak_posneg.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_difffromsinglepeak_classified.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_difffromsinglepeak_posneg.tif".format(method)) 
        
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_classified.tif".format(method)) 
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/hazard_classified.tif".format(method)) 
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_classified.tif".format(method)) 

### Cross checking results with QGIS (raster layer unique values report)

In [None]:
# fp = fps[3]
# raster = prepare_rainfall_scenario_raster(fp.format('Depth'), bbox, remove_little_values)[0]
# unique, counts = np.unique(raster, return_counts=True)
# df = pd.DataFrame({'values': unique, 'counts':counts})
# df