## Comparison to presence/absence map of TP/FP/TN/FN

In [None]:
# Rasterize the resulting shapefile matching with hillshade 15m
import geopandas as gpd
import rasterio
from rasterio.features import rasterize
import numpy as np
import os

# Define the base paths
base_path = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Results_GIS/"
reference_raster_path = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Hillshade_15m_clip.tif"

# Load the reference raster
with rasterio.open(reference_raster_path) as ref_raster:
    ref_transform = ref_raster.transform
    ref_width = ref_raster.width
    ref_height = ref_raster.height
    ref_crs = ref_raster.crs

# List of shapefiles
shapefiles = [
    f"{base_path}allProbs_hourly.shp", 
    # f"{base_path}allProbs_daily_75.shp", 
    # f"{base_path}allProbs_terrain_allCum.shp", 
    # f"{base_path}allProbs_terrain_3wCum.shp",
    # f"{base_path}allProbs_terrain_2wCum.shp", 
    # f"{base_path}allProbs_terrain_1wCum.shp", 
    # f"{base_path}allProbs_terrain_MonCum.shp", 
    # f"{base_path}allProbs_terrain.shp"
]

# Output directory for rasters
output_dir = f"{base_path}rasters/"
os.makedirs(output_dir, exist_ok=True)

def shapefile_to_raster_with_snap(shapefile, field, output_raster):
    # Load the shapefile
    gdf = gpd.read_file(shapefile)

    # Rasterize the geometry, snapping to the reference grid
    shapes = ((geom, value) for geom, value in zip(gdf.geometry, gdf[field]))
    rasterized = rasterize(
        shapes=shapes, 
        out_shape=(ref_height, ref_width),
        transform=ref_transform,
        fill=2,  # NoData value set to 2
        dtype=rasterio.float32
    )
    
    # Save the raster to a file with the same grid as the reference raster
    with rasterio.open(
        output_raster, 'w',
        driver='GTiff',
        height=ref_height,
        width=ref_width,
        count=1,
        dtype=rasterized.dtype,
        crs=ref_crs,
        transform=ref_transform,
        nodata=2  # Set NoData value to 2
    ) as dst:
        dst.write(rasterized, 1)

# Convert each field in each shapefile to a raster using the snapping method
fields = ['DS', 'DF', 'ES', 'EF', 'RS']
for shapefile in shapefiles:
    for field in fields:
        output_raster = os.path.join(output_dir, f"{os.path.basename(shapefile).replace('.shp', '')}_{field}.tif")
        shapefile_to_raster_with_snap(shapefile, field, output_raster)
        print(f"Raster created with snapping: {output_raster}")


In [None]:
#Produce the confusion matrix for the all landsldie tyep and configurations
import geopandas as gpd
import numpy as np
import os

# Define the base paths
base_path_prob = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Results_GIS/"
base_path_GT = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Data/"

# List of probabilities shapefiles 
shapefiles_prob = [
    f"{base_path_prob}allProbs_hourly.shp",
    # f"{base_path_prob}allProbs_daily_75.shp", 
    # f"{base_path_prob}allProbs_terrain_allCum.shp", 
    # f"{base_path_prob}allProbs_terrain_3wCum.shp",
    # f"{base_path_prob}allProbs_terrain_2wCum.shp", 
    # f"{base_path_prob}allProbs_terrain_1wCum.shp", 
    # f"{base_path_prob}allProbs_terrain_MonCum.shp", 
    # f"{base_path_prob}allProbs_terrain.shp"
]

# List of ground truth npy files
npy_GT = [
    f"{base_path_GT}su05_LS_DS.npy", 
    f"{base_path_GT}su05_LS_DF.npy", 
    f"{base_path_GT}su05_LS_ES.npy",
    f"{base_path_GT}su05_LS_EF.npy", 
    f"{base_path_GT}su05_LS_RS1.npy",
]

# Load the ground truth data
ground_truths = [np.load(gt_file) for gt_file in npy_GT]

# Function to add confusion matrix field to a GeoDataFrame
def add_confusion_matrix_fields(prob_gdf, gt_array, field, threshold=0.5):
    # Create a new field for Confusion Matrix results
    prob_gdf[f'{field}_CF'] = "TP"
    
    # Iterate over the GeoDataFrame and compare with the ground truth
    for idx, row in prob_gdf.iterrows():
        prob_value = 1 if row[field] >= threshold else 0  # Apply the threshold
        gt_value = gt_array[idx]  # Get the corresponding ground truth value
        
        if prob_value == 1 and gt_value == 1:
            prob_gdf.at[idx, f'{field}_CF'] = "TP"  # True Positive
        elif prob_value == 0 and gt_value == 0:
            prob_gdf.at[idx, f'{field}_CF'] = "TN"  # True Negative
        elif prob_value == 1 and gt_value == 0:
            prob_gdf.at[idx, f'{field}_CF'] = "FP"  # False Positive
        elif prob_value == 0 and gt_value == 1:
            prob_gdf.at[idx, f'{field}_CF'] = "FN"  # False Negative

# Iterate through each probability shapefile (configuration)
for shapefile_prob in shapefiles_prob:
    # Load the probability shapefile
    prob_gdf = gpd.read_file(shapefile_prob)
    
    # Ensure the GeoDataFrame is not empty
    if prob_gdf.empty:
        print(f"Skipping empty shapefile: {shapefile_prob}")
        continue
    
    # Iterate through each landslide type field
    for field, gt_array in zip(['DS', 'DF', 'ES', 'EF', 'RS'], ground_truths):
        # Ensure the ground truth and shapefile are aligned
        if len(prob_gdf) != len(gt_array):
            raise ValueError(f"Mismatch between the number of entries in {shapefile_prob} and ground truth array for {field}.")
        
        # Add confusion matrix field with the threshold applied
        add_confusion_matrix_fields(prob_gdf, gt_array, field, threshold=0.5)
    
    # Save the modified shapefile
    output_shapefile = shapefile_prob.replace(".shp", "_CF.shp")
    prob_gdf.to_file(output_shapefile)
    print(f"Confusion matrix shapefile created: {output_shapefile}")


## Comparison of presence absence FN and relative landslide area

In [None]:
import geopandas as gpd
import numpy as np
import os
import pandas as pd  # Import pandas for saving to Excel
import openpyxl

# Define the base paths
base_path_CF = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Results_GIS/"
base_path_GT_area = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Data/"

# List of CF shapefiles (classification with Confusion Matrix results)
shapefiles_CF = [
    f"{base_path_CF}allProbs_hourly_CF.shp",
    f"{base_path_CF}allProbs_daily_75_CF.shp", 
    f"{base_path_CF}allProbs_terrain_allCum_CF.shp", 
    f"{base_path_CF}allProbs_terrain_3wCum_CF.shp",
    f"{base_path_CF}allProbs_terrain_2wCum_CF.shp", 
    f"{base_path_CF}allProbs_terrain_1wCum_CF.shp", 
    f"{base_path_CF}allProbs_terrain_MonCum_CF.shp", 
    f"{base_path_CF}allProbs_terrain_CF.shp"
]

# List of ground truth landslide shapefiles
shapefiles_LS = [
    f"{base_path_GT_area}LS_DS.shp", 
    f"{base_path_GT_area}LS_DF.shp", 
    f"{base_path_GT_area}LS_ES.shp",
    f"{base_path_GT_area}LS_EF.shp", 
    f"{base_path_GT_area}LS_RS1.shp", 
]

# Initialize an empty list to store results
results = []

# Iterate over CF shapefiles
for cf_shapefile in shapefiles_CF:
    
    # Load the current CF shapefile (Confusion Matrix results)
    cf_gdf = gpd.read_file(cf_shapefile)
    
    # Filter for only the 'TP' (True Positive) polygons in the CF shapefile
    TP_DS = cf_gdf[cf_gdf['DS_CF'] == 'TP']  
    TP_DF = cf_gdf[cf_gdf['DF_CF'] == 'TP']
    TP_ES = cf_gdf[cf_gdf['ES_CF'] == 'TP']
    TP_EF = cf_gdf[cf_gdf['EF_CF'] == 'TP']
    TP_RS = cf_gdf[cf_gdf['RS_CF'] == 'TP']
    
    # Filter for positive (TP or FN) polygons
    positive_DS = cf_gdf[cf_gdf['DS_CF'].isin(['TP', 'FN'])]
    positive_DF = cf_gdf[cf_gdf['DF_CF'].isin(['TP', 'FN'])]
    positive_ES = cf_gdf[cf_gdf['ES_CF'].isin(['TP', 'FN'])]
    positive_EF = cf_gdf[cf_gdf['EF_CF'].isin(['TP', 'FN'])]
    positive_RS = cf_gdf[cf_gdf['RS_CF'].isin(['TP', 'FN'])]

    print(f"\nProcessing CF shapefile: {cf_shapefile}")
    
    # Load the landslide shapefiles (ground truth)
    ls_DS = gpd.read_file(shapefiles_LS[0])
    ls_DF = gpd.read_file(shapefiles_LS[1])  
    ls_ES = gpd.read_file(shapefiles_LS[2])
    ls_EF = gpd.read_file(shapefiles_LS[3])
    ls_RS = gpd.read_file(shapefiles_LS[4])

    # Calculate total area of each landslide shapefile (ground truth)
    total_area_ls_DS = ls_DS.geometry.area.sum()
    total_area_ls_DF = ls_DF.geometry.area.sum()
    total_area_ls_ES = ls_ES.geometry.area.sum()
    total_area_ls_EF = ls_EF.geometry.area.sum()
    total_area_ls_RS = ls_RS.geometry.area.sum()

    # Perform spatial intersection between TP polygons and landslide polygons
    intersection_DS = gpd.overlay(ls_DS, TP_DS, how='intersection')
    intersection_DF = gpd.overlay(ls_DF, TP_DF, how='intersection')
    intersection_ES = gpd.overlay(ls_ES, TP_ES, how='intersection')
    intersection_EF = gpd.overlay(ls_EF, TP_EF, how='intersection')
    intersection_RS = gpd.overlay(ls_RS, TP_RS, how='intersection')

    # Calculate the area of the intersection for each type
    intersection_DS['area'] = intersection_DS.geometry.area
    intersection_DF['area'] = intersection_DF.geometry.area
    intersection_ES['area'] = intersection_ES.geometry.area
    intersection_EF['area'] = intersection_EF.geometry.area
    intersection_RS['area'] = intersection_RS.geometry.area
    
    # Calculate total TP area for each type
    total_area_TP_DS = intersection_DS['area'].sum()
    total_area_TP_DF = intersection_DF['area'].sum()
    total_area_TP_ES = intersection_ES['area'].sum()
    total_area_TP_EF = intersection_EF['area'].sum()
    total_area_TP_RS = intersection_RS['area'].sum()

    # Calculate the ratio of TP area to total landslide area for each type
    ratio_DS = total_area_TP_DS / total_area_ls_DS if total_area_ls_DS > 0 else 0
    ratio_DF = total_area_TP_DF / total_area_ls_DF if total_area_ls_DF > 0 else 0
    ratio_ES = total_area_TP_ES / total_area_ls_ES if total_area_ls_ES > 0 else 0
    ratio_EF = total_area_TP_EF / total_area_ls_EF if total_area_ls_EF > 0 else 0
    ratio_RS = total_area_TP_RS / total_area_ls_RS if total_area_ls_RS > 0 else 0

    # Calculate the count of TP and total positive slope units (TP + FN)
    count_TP_DS = len(TP_DS)
    count_positive_DS = len(positive_DS)
    ratio_count_DS = count_TP_DS / count_positive_DS if count_positive_DS > 0 else 0

    count_TP_DF = len(TP_DF)
    count_positive_DF = len(positive_DF)
    ratio_count_DF = count_TP_DF / count_positive_DF if count_positive_DF > 0 else 0

    count_TP_ES = len(TP_ES)
    count_positive_ES = len(positive_ES)
    ratio_count_ES = count_TP_ES / count_positive_ES if count_positive_ES > 0 else 0

    count_TP_EF = len(TP_EF)
    count_positive_EF = len(positive_EF)
    ratio_count_EF = count_TP_EF / count_positive_EF if count_positive_EF > 0 else 0

    count_TP_RS = len(TP_RS)
    count_positive_RS = len(positive_RS)
    ratio_count_RS = count_TP_RS / count_positive_RS if count_positive_RS > 0 else 0

    # Append the results to the list
    results.append([
        os.path.basename(cf_shapefile), 
        total_area_TP_DS, ratio_DS, count_TP_DS, ratio_count_DS,
        total_area_TP_DF, ratio_DF, count_TP_DF, ratio_count_DF,
        total_area_TP_ES, ratio_ES, count_TP_ES, ratio_count_ES,
        total_area_TP_EF, ratio_EF, count_TP_EF, ratio_count_EF,
        total_area_TP_RS, ratio_RS, count_TP_RS, ratio_count_RS
    ])

# Convert results to a numpy array
results_array = np.array(results, dtype=object)

# Save the results to a .npy file
output_npy = os.path.join(base_path_CF, "TP_area_ratio_summary.npy")
np.save(output_npy, results_array)

# Save the results to an Excel file
output_excel = os.path.join(base_path_CF, "TP_area_ratio_summary.xlsx")
df = pd.DataFrame(results, columns=[
    'Configuration', 
    'Total_area_TP_DS', 'Area_Ratio_DS', 'Total_count_TP_DS', 'Count_Ratio_DS',
    'Total_area_TP_DF', 'Area_Ratio_DF', 'Total_count_TP_DF', 'Count_Ratio_DF',
    'Total_area_TP_ES', 'Area_Ratio_ES', 'Total_count_TP_ES', 'Count_Ratio_ES',
    'Total_area_TP_EF', 'Area_Ratio_EF', 'Total_count_TP_EF', 'Count_Ratio_EF',
    'Total_area_TP_RS', 'Area_Ratio_RS', 'Total_count_TP_RS', 'Count_Ratio_RS'
])
df.to_excel(output_excel, index=False)

# Print the results
print("\nFinal Results (TP Polygon Areas, Ratios, Counts inside Landslide Shapefiles):")
print("CF Shapefile, Total_TP_DS, Ratio_DS, Count_TP_DS, Ratio_Count_DS, Total_TP_DF, Ratio_DF, Count_TP_DF, Ratio_Count_DF, Total_TP_ES, Ratio_ES, Count_TP_ES, Ratio_Count_ES, Total_TP_EF, Ratio_EF, Count_TP_EF, Ratio_Count_EF, Total_TP_RS, Ratio_RS, Count_TP_RS, Ratio_Count_RS")
for row in results_array:
    print(row)


## Comparison of the ls counts and binary probability results

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import pyogrio

In [None]:
# Define the base path where your shapefiles are located
base_path = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Results_GIS/"

# Load your source shapefiles and normalize the 'Trig_count' field
def normalize_trig_count(shapefile_path, output_field):
    # Read shapefile into a GeoDataFrame
    gdf = pyogrio.read_dataframe(shapefile_path)
    
    # Normalize 'Trig_count' field
    scaler = MinMaxScaler()
    gdf[output_field] = scaler.fit_transform(gdf[['Trig_count']])
    
    return gdf[['value', output_field]]

# List of shapefiles and corresponding fields
shapefiles = {
    f"{base_path}su05__LS_DS.shp": "DensNor_DS",
    f"{base_path}su05__LS_DF.shp": "DensNor_DF",
    f"{base_path}su05__LS_ES.shp": "DensNor_ES",
    f"{base_path}su05__LS_EF.shp": "DensNor_EF",
    f"{base_path}su05__LS_RS1.shp": "DensNor_RS"
}

# Normalize each shapefile and store the results in a dictionary
normalized_data = {}
for shapefile, output_field in shapefiles.items():
    normalized_data[output_field] = normalize_trig_count(shapefile, output_field)

# List of destination shapefiles
dest_shapefiles = [
    f"{base_path}allProbs_daily_75.shp", 
    f"{base_path}allProbs_terrain_allCum.shp", 
    f"{base_path}allProbs_terrain_3wCum.shp",
    f"{base_path}allProbs_terrain_2wCum.shp", 
    f"{base_path}allProbs_terrain_1wCum.shp", 
    f"{base_path}allProbs_terrain_MonCum.shp", 
    f"{base_path}allProbs_terrain.shp"
]

# Function to add normalized fields and calculate differences
def add_fields_and_calculate_diff(dest_shapefile, normalized_data):
    # Read the destination shapefile into a GeoDataFrame
    dest_gdf = pyogrio.read_dataframe(dest_shapefile)
    print(f"Initial columns in {dest_shapefile}: {dest_gdf.columns.tolist()}")
    
    # Merge the normalized data based on the 'value' field
    for norm_field, norm_gdf in normalized_data.items():
        print(f"Merging {norm_field} into {dest_shapefile}")
        dest_gdf = dest_gdf.merge(norm_gdf, on='value', how='left')
        print(f"Columns after merging {norm_field}: {dest_gdf.columns.tolist()}")
        # Handle potential field name conflicts
        if f"{norm_field}_x" in dest_gdf.columns:
            dest_gdf[norm_field] = dest_gdf[f"{norm_field}_x"]
            dest_gdf.drop(columns=[f"{norm_field}_x", f"{norm_field}_y"], inplace=True)
        elif f"{norm_field}_y" in dest_gdf.columns:
            dest_gdf[norm_field] = dest_gdf[f"{norm_field}_y"]
            dest_gdf.drop(columns=[f"{norm_field}_y"], inplace=True)
    
    # Calculate the difference fields
    for original_field, norm_field in zip(['DS', 'DF', 'ES', 'EF', 'RS'], shapefiles.values()):
        diff_field = f"{original_field}_diff_GT"
        if norm_field in dest_gdf.columns:
            dest_gdf[diff_field] = dest_gdf[original_field] - dest_gdf[norm_field]
            print(f"Calculated {diff_field} in {dest_shapefile}")
        else:
            print(f"Error: {norm_field} not found in {dest_shapefile} during calculation of {diff_field}")
    
    # Save the updated GeoDataFrame back to a shapefile
    pyogrio.write_dataframe(dest_gdf, dest_shapefile)

# Apply the process to all destination shapefiles
for dest_shapefile in dest_shapefiles:
    add_fields_and_calculate_diff(dest_shapefile, normalized_data)


In [None]:
import pyogrio
import pandas as pd
import numpy as np

# Define the base path where your shapefiles are located
base_path = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Results_GIS/"

# List of destination shapefiles
dest_shapefiles = [
    f"{base_path}allProbs_daily_75.shp", 
    f"{base_path}allProbs_terrain_allCum.shp", 
    f"{base_path}allProbs_terrain_3wCum.shp",
    f"{base_path}allProbs_terrain_2wCum.shp", 
    f"{base_path}allProbs_terrain_1wCum.shp", 
    f"{base_path}allProbs_terrain_MonCum.shp", 
    f"{base_path}allProbs_terrain.shp"
]

# Function to compute statistics for the *_diff_GT fields
def compute_statistics(diff_fields):
    stats = {}
    for field in diff_fields:
        stats[field] = {
            "min": diff_fields[field].min(),
            "max": diff_fields[field].max(),
            "mean": diff_fields[field].mean(),
            "std_dev": diff_fields[field].std(),
            "25_percentile": np.percentile(diff_fields[field], 25),
            "75_percentile": np.percentile(diff_fields[field], 75)
        }
    return stats

# Iterate over each shapefile and compute statistics for *_diff_GT fields
for dest_shapefile in dest_shapefiles:
    # Read the shapefile into a GeoDataFrame
    dest_gdf = pyogrio.read_dataframe(dest_shapefile)
    
    # Filter columns to get only *_diff_GT fields
    diff_gt_fields = [col for col in dest_gdf.columns if col.endswith('_diff_GT')]
    
    # Extract the data for these fields
    diff_data = dest_gdf[diff_gt_fields]
    
    # Compute statistics
    stats = compute_statistics(diff_data)
    
    # Print the statistics for each shapefile
    print(f"Statistics for {dest_shapefile}:")
    for field, field_stats in stats.items():
        print(f"Field: {field}")
        for stat_name, value in field_stats.items():
            print(f"  {stat_name}: {value}")
    print("\n")  # Add a newline for readability

In [None]:
import pyogrio
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Define the base path where your shapefiles are located
base_path = "Z:/GEOAPP_Synology/Lavori/Twente/Python_twente/ashokdahal-TransformerLandslide-23fdcf6/Results_GIS/"

# List of destination shapefiles
dest_shapefiles = [
    f"{base_path}allProbs_daily_75.shp", 
    f"{base_path}allProbs_terrain_allCum.shp", 
    f"{base_path}allProbs_terrain_3wCum.shp",
    f"{base_path}allProbs_terrain_2wCum.shp", 
    f"{base_path}allProbs_terrain_1wCum.shp", 
    f"{base_path}allProbs_terrain_MonCum.shp", 
    f"{base_path}allProbs_terrain.shp"
]

# Function to compute error metrics
def compute_error_metrics(pred, obs):
    mae = np.mean(np.abs(pred - obs))
    rmse = np.sqrt(np.mean((pred - obs) ** 2))
    bias = np.mean(pred - obs)
    obs_mean = np.mean(obs)
    
    rae = np.sum(np.abs(pred - obs)) / np.sum(np.abs(obs - obs_mean))
    rse = np.sum((pred - obs) ** 2) / np.sum((obs - obs_mean) ** 2)
    
    return {
        "MAE": mae,
        "RMSE": rmse,
        "Bias": bias,
        "RAE": rae,
        "RSE": rse
    }

# Create a dictionary to store metrics for each shapefile
metrics_summary = {}

# Iterate over each shapefile and compute error metrics for *_diff_GT fields
for dest_shapefile in dest_shapefiles:
    # Initialize the dictionary for the current shapefile
    metrics_summary[dest_shapefile] = {}
    
    # Read the shapefile into a GeoDataFrame
    dest_gdf = pyogrio.read_dataframe(dest_shapefile)
    
    # Filter columns to get only *_diff_GT fields
    diff_gt_fields = [col for col in dest_gdf.columns if col.endswith('_diff_GT')]
    
    for diff_field in diff_gt_fields:
        # Extract the predicted and observed values
        pred_field = diff_field.replace("_diff_GT", "")  # e.g., DS_diff_GT -> DS
        observed_field = "DensNor_" + pred_field  # Construct the expected observed field name
        
        if pred_field in dest_gdf.columns and observed_field in dest_gdf.columns:
            pred = dest_gdf[pred_field]
            obs = dest_gdf[observed_field]  # Observed values should be in DensNor_DS, etc.
            
            # Compute error metrics
            error_metrics = compute_error_metrics(pred, obs)
            metrics_summary[dest_shapefile][pred_field] = error_metrics
        else:
            if pred_field not in dest_gdf.columns:
                print(f"Warning: {pred_field} not found in {dest_shapefile}")
            if observed_field not in dest_gdf.columns:
                print(f"Warning: {observed_field} not found in {dest_shapefile}")

# Calculate average metrics for each shapefile
average_metrics = {}
for shapefile, landslide_types in metrics_summary.items():
    avg_mae = np.mean([v["MAE"] for v in landslide_types.values()])
    avg_rmse = np.mean([v["RMSE"] for v in landslide_types.values()])
    avg_bias = np.mean([v["Bias"] for v in landslide_types.values()])
    avg_rae = np.mean([v["RAE"] for v in landslide_types.values()])
    avg_rse = np.mean([v["RSE"] for v in landslide_types.values()])
    
    average_metrics[shapefile] = {
        "Avg MAE": avg_mae,
        "Avg RMSE": avg_rmse,
        "Avg Bias": avg_bias,
        "Avg RAE": avg_rae,
        "Avg RSE": avg_rse
    }

# Find the best shapefile overall
best_shapefile_overall = min(average_metrics, key=lambda k: (average_metrics[k]["Avg RMSE"], average_metrics[k]["Avg MAE"]))
print(f"Best overall shapefile: {best_shapefile_overall} with metrics: {average_metrics[best_shapefile_overall]}")

# Find the best shapefile for each landslide type
best_shapefiles_per_type = {}
for landslide_type in ["DS", "DF", "ES", "EF", "RS"]:
    best_shapefile = min(metrics_summary, key=lambda k: (metrics_summary[k][landslide_type]["RMSE"], metrics_summary[k][landslide_type]["MAE"]))
    best_shapefiles_per_type[landslide_type] = best_shapefile

print("Best shapefile per landslide type:")
for landslide_type, shapefile in best_shapefiles_per_type.items():
    print(f"  {landslide_type}: {shapefile} with metrics: {metrics_summary[shapefile][landslide_type]}")



In [None]:
# Find the best shapefile overall
best_shapefile_overall = min(average_metrics, key=lambda k: (average_metrics[k]["Avg RMSE"], average_metrics[k]["Avg Bias"]))
print(f"Best overall shapefile: {best_shapefile_overall} with metrics: {average_metrics[best_shapefile_overall]}")

# Find the best shapefile for each landslide type
best_shapefiles_per_type = {}
for landslide_type in ["DS", "DF", "ES", "EF", "RS"]:
    best_shapefile = min(metrics_summary, key=lambda k: (metrics_summary[k][landslide_type]["RMSE"], metrics_summary[k][landslide_type]["Bias"]))
    best_shapefiles_per_type[landslide_type] = best_shapefile

print("Best shapefile per landslide type:")
for landslide_type, shapefile in best_shapefiles_per_type.items():
    print(f"  {landslide_type}: {shapefile} with metrics: {metrics_summary[shapefile][landslide_type]}")

In [None]:
# Normalize the metrics and calculate the composite score for each landslide type
composite_scores_per_type = {lt: {} for lt in ["DS", "DF", "ES", "EF", "RS"]}

for landslide_type in ["DS", "DF", "ES", "EF", "RS"]:
    # Extract metrics for this landslide type across all shapefiles
    metrics_per_type = {shp: metrics_summary[shp][landslide_type] for shp in dest_shapefiles if landslide_type in metrics_summary[shp]}
    
    # Normalize metrics
    for metric in ["MAE", "RMSE", "Bias", "RAE", "RSE"]:
        values = [metrics[metric] for metrics in metrics_per_type.values()]
        min_value, max_value = min(values), max(values)
        for shp in metrics_per_type:
            norm_value = (metrics_per_type[shp][metric] - min_value) / (max_value - min_value) if max_value != min_value else 0
            metrics_per_type[shp][f"norm_{metric}"] = norm_value
    
    # Calculate composite score
    for shp in metrics_per_type:
        composite_score = np.mean([metrics_per_type[shp][f"norm_{metric}"] for metric in ["MAE", "RMSE", "Bias", "RAE", "RSE"]])
        composite_scores_per_type[landslide_type][shp] = composite_score

# Identify the best shapefile for each landslide type based on the composite score
best_shapefiles_per_type = {}
for landslide_type, scores in composite_scores_per_type.items():
    best_shapefile = min(scores, key=scores.get)
    best_shapefiles_per_type[landslide_type] = best_shapefile

# Print the best shapefile for each landslide type
print("Best shapefile per landslide type based on composite score:")
for landslide_type, shapefile in best_shapefiles_per_type.items():
    print(f"  {landslide_type}: {shapefile} with composite score: {composite_scores_per_type[landslide_type][shapefile]}")