In [1]:
import pandas as pd
import numpy as np
import sys

sys.path.append("../")
from my_functions import *

catchment_name = 'LinDyke' #LinDyke
methods_key ='Observed'
region = '' # 'Kippax' #'' # 'Garforth'

# Specify strings relating to catchment
if catchment_name == 'LinDyke':
    catchment_name_str = "Resampled.Terrain" 
    minx, miny, maxx, maxy = 437000,  426500,  445500, 434300
    cell_size_in_m2 = 1
elif catchment_name == 'WykeBeck':
    catchment_name_str = "Terrain.wykeDEM" 
    minx, miny, maxx, maxy = 430004,  429978, 438660, 440996 
    cell_size_in_m2 = 4
    
# Create a bounding box (this is used in preparing the rasters)
bbox = box(minx, miny, maxx, maxy)    

In [2]:
# Set up ink to model directory and read in catchment shapefile
model_directory = '../../../FloodModelling/{}Models/Model_{}Profiles/'.format(catchment_name, methods_key)

# Define whether to filter out values <0.1
remove_little_values = True

### Define the names of the method (in dictionary for different model runs)

In [3]:
methods_dict = {'Idealised': [ '6h_sp_c_0.5','6h_sp_fl_0.1', '6h_sp_fl_0.2', '6h_sp_fl_0.3', '6h_sp_fl_0.4',
                    '6h_sp_bl_0.6','6h_sp_bl_0.7','6h_sp_bl_0.8','6h_sp_bl_0.9'],
                'Observed':['6h_feh_singlepeak', '6h_c1','6h_c2','6h_c3','6h_c4', '6h_c5', '6h_c6','6h_c7',
             '6h_c8','6h_c9','6h_c10', '6h_c11', '6h_c12','6h_c13','6h_c14','6h_c15'], 
               'SinglePeak_Scaled':['6h_sp_+0%','6h_sp_+5%','6h_sp_+10%','6h_sp_+15%','6h_sp_+20%']}

In [4]:
methods = methods_dict[methods_key]

### Get version of landcover array with just 'urban' and 'rural' categories

In [5]:
# Read in the data
landcover, out_meta = prepare_rainfall_scenario_raster(model_directory + "../LandCoverData/{}/LandCover_clipped.tif".format(region), bbox, True)
# Convert the 1 and 6 values to 10 (for urban) and the rest to 11 (for non-urban).  
landcover_mod =  np.where(landcover==1, 10, landcover)
landcover_mod =  np.where(landcover_mod==6, 10, landcover_mod)
# Convert the rest of the classes to 11
for i in [1,2,3,4,5,7,8,9]:
    landcover_mod =  np.where(landcover_mod==i, 11, landcover_mod)

### Find maximum intensity for each method and minute in which it occurs (to use in sorting results analysis)

In [6]:
maxs = []
min_of_maxs = []

for method in methods:
    precip=pd.read_csv("../CreateSyntheticRainfallEvents/{}Profiles/{}/6hr_100yrRP/PostLossRemoval/{}_urban.csv".format(methods_key,catchment_name, method))
    # Trim and add minutes column
    precip = precip[0:360].copy()
    precip['minute']=range(1,361)
    # Add max and minutes of max
    maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].max())
    min_of_maxs.append(precip["Total net rain mm (Observed rainfall - 01/08/2022) - urbanised model"].idxmax())

### Create versions of lists of methods, in order based on max intensity and the the timing of the max intensity 

In [7]:
short_ids_by_loading=  pd.DataFrame({"min": min_of_maxs, 'method_name': methods}).sort_values('min')["method_name"].tolist()
short_ids_by_intensity = pd.DataFrame({"min": maxs, 'method_name': methods}).sort_values('min', ascending = False)["method_name"].tolist()

### Create dataframe of colours for each cluster (based on their loading)

In [8]:
if methods_key == 'Observed':
    colours_df = create_colours_df_observed(short_ids_by_loading, methods)
elif methods_key == 'Idealised':
    colours_df = create_colours_df_idealised( short_ids_by_loading, methods)
elif methods_key == 'SinglePeak_Scaled':
    colours_df = create_colours_df_sp( short_ids_by_loading, methods)

### Create list of filepaths, formatted to be used for either depth or velocity

In [9]:
fps = []
for method_num, short_id in enumerate(methods):
    fp = model_directory + "{}/{} (Max).{}.tif".format(short_id, '{}', catchment_name_str)
    fps.append(fp)
if methods_key == 'Observed':
    fps[0] = '../../../FloodModelling/{}Models/Model_FEHProfiles/6h_feh_singlepeak/{}/{} (Max).{}.tif'.format(catchment_name, region, '{}', catchment_name_str)

### Define breaks for categorising velocity and depth

In [10]:
colours_df = create_colours_df_observed(short_ids_by_loading, methods)

In [11]:
# Define breaks to split the depths/velocities on
breaks_depths = np.array([0, 0.3, 0.6, 1.2, 100])  
labels_depth = ['<=0.3m', '0.3-0.6m', '0.6-1.2m', '>1.2m']
breaks_velocity = np.array([0,0.25,0.5,2,100])
labels_velocity = ["<=0.25m/s", "0.25-0.5m/s", "0.5-2m/s", ">2m/s"]

# <u> Flood extent </u>
To examine whether the rainfall's temporal distribution influences the total extent of flooding, the number of flooded cells and the total flooded area in km2 (incl. only cells with depth >0.1m) is compared between the profile with a single peak, and the three methods for producing multi-peaked rainfall events. b

### Create dataframes containing the (total/urban) flooded area in each depth/velocity bin

In [12]:
velocity_counts, velocity_props = create_binned_counts_and_props(methods, fps, 'Velocity', breaks_velocity, labels_velocity, bbox, remove_little_values)
depth_counts, depth_props = create_binned_counts_and_props(methods, fps, 'Depth', breaks_depths, labels_depth, bbox, remove_little_values)

In [13]:
velocity_counts_urban, velocity_props_urban = create_binned_counts_and_props_urban(methods, fps, 'Velocity', breaks_velocity, labels_velocity,bbox, remove_little_values, landcover_mod)
depth_counts_urban, depth_props_urban = create_binned_counts_and_props_urban(methods, fps, 'Depth', breaks_depths, labels_depth,bbox, remove_little_values, landcover_mod)

### Create dataframes containing the (total/urban) flooded area

In [14]:
totals_df = create_totals_df(depth_counts, cell_size_in_m2)
totals_df_urban = create_totals_df(velocity_counts_urban, cell_size_in_m2)  
totals_df

Unnamed: 0,short_id,FloodedArea
0,6h_feh_singlepeak,1.938815
1,6h_c1,1.716188
2,6h_c2,1.666437
3,6h_c3,1.688951
4,6h_c4,1.662872
5,6h_c5,1.843293
6,6h_c6,1.65421
7,6h_c7,1.722111
8,6h_c8,1.558924
9,6h_c9,1.652527


### Create dataframes containing the % diff in the flooded area between single peak and each other method  

In [15]:
if methods_key == 'Observed':
    column_for_comparison = '6h_feh_singlepeak'
elif methods_key == 'Idealised':
    column_for_comparison ='6h_sp_c_0.5'    
elif methods_key == 'SinglePeak_Scaled':
    column_for_comparison ='6h_sp_+0%'        
    
percent_diffs_df = find_percentage_diff (methods, column_for_comparison, totals_df, fps) 
percent_diffs_df_urban = find_percentage_diff (methods, column_for_comparison, totals_df_urban, fps)
percent_diffs_df

Unnamed: 0,percent_diff_formatted,percent_diffs,percent_diffs_abs
0,,0.0,0.0
1,-11.48%,-11.48,11.48
2,-14.05%,-14.05,14.05
3,-12.89%,-12.89,12.89
4,-14.23%,-14.23,14.23
5,-4.93%,-4.93,4.93
6,-14.68%,-14.68,14.68
7,-11.18%,-11.18,11.18
8,-19.59%,-19.59,19.59
9,-14.77%,-14.77,14.77


## Find number of cells in which each method leads to the worst flooding (depth/velocity)

In [16]:
# # Find the number of flooded cells with the worst flooding for each method
# worst_case_method_depth = find_worst_case_method(fps, methods, 'Depth')
# worst_case_method_velocity = find_worst_case_method(fps, methods,  'Velocity') 

In [17]:
# # Remove multiple matches and nan
# worst_case_method_depth = worst_case_method_depth[~worst_case_method_depth['values'].isin(['multiple matches','nan'])]
# worst_case_method_velocity = worst_case_method_velocity[~worst_case_method_velocity['values'].isin(['multiple matches','nan'])]

# # # Reorder (and also add in the methods that are missing)
# worst_case_method_depth = pd.merge(worst_case_method_depth,  pd.DataFrame({'values': methods}), how="outer")
# worst_case_method_depth = worst_case_method_depth.reindex(worst_case_method_depth['values'].map(dict(zip(methods, range(len(methods))))).sort_values().index)
# worst_case_method_depth.reset_index(inplace=True,drop=True)

# worst_case_method_velocity = pd.merge(worst_case_method_velocity,  pd.DataFrame({'values': methods}), how="outer")
# worst_case_method_velocity = worst_case_method_velocity.reindex(worst_case_method_velocity['values'].map(dict(zip(methods, range(len(methods))))).sort_values().index)
# worst_case_method_velocity.reset_index(inplace=True,drop=True)

## Find number of cells with each hazard rating

In [18]:
hazard_counts, hazard_props = create_binned_counts_and_props_hazard(methods, fps, catchment_name_str, bbox)

## Find number of cells which have moved between hazard categories

In [19]:
hazard_cat_changes = create_binned_counts_and_props_hazard_cat_change(methods, fps, catchment_name_str, bbox)

### Create a dataframe containing all the info on each of the scenarios

In [20]:
cluster_results = pd.DataFrame({'Cluster_num': methods, "MaxRainfallIntensity": maxs,  
    "MaxRainfallIntensityMinute": min_of_maxs,
   'FloodedArea':totals_df['FloodedArea'],'%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs'],
    '%Diff_FloodedArea_fromSP_formatted':percent_diffs_df['percent_diff_formatted'],
    'Abs%Diff_FloodedArea_fromSP':percent_diffs_df['percent_diffs_abs'],
                                'UrbanFloodedArea':totals_df_urban['FloodedArea'],
 '%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs'] ,
  '%Diff_UrbanFloodedArea_fromSP_formatted':percent_diffs_df_urban['percent_diff_formatted'],
   'Abs%Diff_UrbanFloodedArea_fromSP':percent_diffs_df_urban['percent_diffs_abs'], 
   #'WorstCaseDepth_ncells': worst_case_method_depth['counts'].tolist(),
   # 'WorstCaseVelocity_ncells': worst_case_method_velocity['counts'].tolist(), 
                                'colour':colours_df['colour']}) 

### Add the depth/velocity category breakdowns and hazard categories to this

In [21]:
dfs = [velocity_props, depth_props,  velocity_props_urban, depth_props_urban,
       velocity_counts, depth_counts, velocity_counts_urban, depth_counts_urban,
       hazard_counts, hazard_props]
suffixes = ['_propcells', '_propcells', '_propcells_urban','_propcells_urban',
            '_countcells','_countcells','_countcells_urban', '_countcells_urban', '_numcells', '_propcells']

for num, df in enumerate(dfs):
    # Reformat the dataframe
    df = df.set_index('index').T
    # Add the correct suffix to the column names
    df = df.add_suffix(suffixes[num]) 
    # Add Cluster_num column for joining
    df['Cluster_num'] = df.index#
    # Join to cluster results dataframe
    cluster_results = pd.merge(cluster_results,  df, how="outer", on = 'Cluster_num')
    
cluster_results = pd.merge(cluster_results, hazard_cat_changes,  how="outer", on = 'Cluster_num')    

### Save to file

In [22]:
# Create path to the folder
path = "Outputs/Data/{}Profiles/{}/".format(methods_key, catchment_name)
# Check whether the specified path exists or not
isExist = os.path.exists(path)
# Create a new directory because it does not exist
if not isExist:
    os.makedirs(path)
# Save
cluster_results.to_csv(path + "{}allclusters_summary.csv".format(region), index=False)

### Delete tiff files (as these aren't used again and take up a lot of space)

In [24]:
# for method in short_ids:
#     print(method)
#     if method != '6h_feh_sp':
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/hazard_cat_difference.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_difffromsinglepeak_classified.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_difffromsinglepeak_posneg.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_difffromsinglepeak_classified.tif".format(method)) 
#         os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_difffromsinglepeak_posneg.tif".format(method)) 
        
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/Depth_classified.tif".format(method)) 
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/hazard_classified.tif".format(method)) 
#     os.remove("../../../../FloodModelling/MeganModel_New/{}/Velocity_classified.tif".format(method)) 

### Cross checking results with QGIS (raster layer unique values report)

In [25]:
# fp = fps[3]
# raster = prepare_rainfall_scenario_raster(fp.format('Depth'), bbox, remove_little_values)[0]
# unique, counts = np.unique(raster, return_counts=True)
# df = pd.DataFrame({'values': unique, 'counts':counts})
# df