In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import matplotlib
import seaborn as sns
from matplotlib import colormaps as cm
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from sklearn.preprocessing import MinMaxScaler

# From here on is for IG

In [None]:
# IG all means
import numpy as np
import matplotlib.pyplot as plt
import mpl_axes_aligner

# Load all the scores
data_dict = {
    'DS': np.load("ExplainedGradients/all_scores_DS_daily_75_IG.npy"),
    'DF': np.load("ExplainedGradients/all_scores_DF_daily_75_IG.npy"),
    'ES': np.load("ExplainedGradients/all_scores_ES_daily_75_IG.npy"),
    'EF': np.load("ExplainedGradients/all_scores_EF_daily_75_IG.npy"),
    'RS': np.load("ExplainedGradients/all_scores_RS_daily_75_IG.npy")
}

# Load the susceptibility predictions
preds_dict = {
    'DS': np.load("Data/DS_daily_75_pred.npy"),
    'DF': np.load("Data/DF_daily_75_pred.npy"),
    'ES': np.load("Data/ES_daily_75_pred.npy"),
    'EF': np.load("Data/EF_daily_75_pred.npy"),
    'RS': np.load("Data/RS_daily_75_pred.npy")
}

# Load the daily rainfall data
rainfall_data = np.load("Data/daily_rain.npy")

# Set time for daily interval 31 days and invert it to fit as last the day of the event
t = range(31)

def plot_daily_scores_and_rainfall(data_dict, preds_dict, rainfall_data):
    # Define line styles and colors for each landslide type
    line_styles = {
        'DS': {'color': 'red', 'linestyle': '-.'},
        'DF': {'color': 'orange', 'linestyle': '-.'},
        'ES': {'color': 'green', 'linestyle': '-.'},
        'EF': {'color': 'black', 'linestyle': '-.'},
        'RS': {'color': 'purple', 'linestyle': '-.'}
    }

    # Dictionary to hold mean IG scores per day and filtered rainfall means for each type
    mean_daily_scores = {}
    filtered_rainfall_means = {}

    # Filter and compute daily mean scores for each landslide type based on susceptibility > 0.5
    for label, data in data_dict.items():
        pred = preds_dict[label][:, 0]  # Access first dimension (1D) of the pred array

        # Check if pred array is 1D and data array is 3D
        if pred.ndim != 1 or data.ndim != 3:
            raise ValueError(f"Shape mismatch: pred should be 1D and data should be 3D. Got pred.shape={pred.shape}, data.shape={data.shape}")

        # Extract indices of rows where susceptibility is greater than 0.5
        selected_indices = np.where(pred > 0.5)[0]

        # Filter the data for selected indices (only locations and days, discard the third dimension)
        filtered_data = data[selected_indices, :, 0]  # Extract first two dimensions

        # Skip this label if no data meets the criteria
        if filtered_data.size == 0:
            continue

        # Calculate the mean score for each day across selected locations
        mean_daily_scores[label] = np.mean(filtered_data, axis=0)

        # Filter rainfall data for the selected indices and compute mean across locations
        filtered_rainfall = rainfall_data[selected_indices, :]
        filtered_rainfall_means[label] = np.mean(filtered_rainfall, axis=0)

    # Calculate overall mean of filtered rainfall across all landslide types
    all_filtered_rainfall = np.array(list(filtered_rainfall_means.values()))
    daily_filtered_rainfall_mean = np.mean(all_filtered_rainfall, axis=0)

    # Set initial y-axis limits
    ax1_min, ax1_max = 0, 140  # Rainfall range
    ax2_min, ax2_max = -0.5, 1  # IG score range

    # Plot the daily mean scores and daily rainfall
    fig, ax1 = plt.subplots(figsize=(10, 6), dpi=600)

    # Plot daily filtered rainfall as a line on the primary y-axis
    ax1.plot(t, daily_filtered_rainfall_mean, label='Filtered daily rainfall (mm)', color='blue', linestyle='-', linewidth=2, zorder=3)
    ax1.set_xlabel('Days Before Event', fontsize=16)
    ax1.set_ylabel('Daily Rainfall (mm)', color='blue', fontsize=16)
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.set_ylim(ax1_min, ax1_max)  # Adjusted y-limits for rainfall
    ax1.set_yticks(np.arange(ax1_min, ax1_max + 35, 35))

    # Create a secondary y-axis for the daily mean IG scores
    ax2 = ax1.twinx()
    
    # Add a horizontal line at 0 for IG scores on `ax2`, positioned underneath all other lines
    ax2.axhline(y=0, color='grey', linestyle='-', linewidth=2, zorder=1)
    
    # Plot each daily mean IG score on top of the grey line
    for label, mean_daily_score in mean_daily_scores.items():
        ax2.plot(t, mean_daily_score, label=f'Filtered {label} IG', **line_styles[label], linewidth=1.5, zorder=3)

    # Set the secondary y-axis for daily IG scores
    ax2.set_ylabel('IG Scores', color='black', fontsize=16)
    ax2.tick_params(axis='y', labelcolor='black')
    ax2.set_ylim(ax2_min, ax2_max)  # Adjusted y-limits for daily IG scores
    ax2.set_yticks(np.arange(ax2_min, ax2_max + 0.25, 0.25))

    # Set the font properties globally
    plt.rc('font', family='Times New Roman', size=16)

    # Add both horizontal and vertical grid lines only on the secondary y-axis
    ax2.grid(True, axis='both', linestyle='-', linewidth=1,)
    ax1.grid(True, axis='x', linestyle='--', linewidth=1)

    # Add legends
    ax1.legend(loc='upper center', fontsize=16)
    ax2.legend(loc='upper left', fontsize=16)

    # Reverse the x-axis labels to show days before event, labeled from 30 to 0
    ax1.invert_xaxis()
    ax1.set_xticks(np.arange(0, 31, 1))  # Set ticks at each day
    ax1.set_xticklabels(np.arange(30, -1, -1), fontsize=16)  # Label from 30 down to 0

    # Set x-axis limits
    ax1.set_xlim(0, 30)

    # Align y = 0 of ax1 and ax2 with the center of the figure
    mpl_axes_aligner.align.yaxes(ax1, 0, ax2, 0)

    # Adjust layout for a clean display
    plt.tight_layout()

    # Save the figure
    plt.savefig("Plots/IG_Rainfall_and_Scores_only_means_daily.png")
    plt.savefig("Plots/IG_Rainfall_and_Scores_only_means_daily.pdf")

    # Show the plot
    plt.show()

# Plot daily rainfall and scores using the filtered data
plot_daily_scores_and_rainfall(data_dict, preds_dict, rainfall_data)


In [None]:
# IG all means but cumulative (not clear)
import numpy as np
import matplotlib.pyplot as plt

# Load all the scores
data_dict = {
    'DS': np.load("ExplainedGradients/all_scores_DS_daily_75_IG.npy"),
    'DF': np.load("ExplainedGradients/all_scores_DF_daily_75_IG.npy"),
    'ES': np.load("ExplainedGradients/all_scores_ES_daily_75_IG.npy"),
    'EF': np.load("ExplainedGradients/all_scores_EF_daily_75_IG.npy"),
    'RS': np.load("ExplainedGradients/all_scores_RS_daily_75_IG.npy")
}

# Load the susceptibility predictions
preds_dict = {
    'DS': np.load("Data/DS_daily_75_pred.npy"),
    'DF': np.load("Data/DF_daily_75_pred.npy"),
    'ES': np.load("Data/ES_daily_75_pred.npy"),
    'EF': np.load("Data/EF_daily_75_pred.npy"),
    'RS': np.load("Data/RS_daily_75_pred.npy")
}

# Load the daily rainfall data
rainfall_data = np.load("Data/daily_rain.npy")

# Set time for daily interval 31 days and invert it to fit as last the day of the event
t = range(31)

def plot_cumulative_scores_and_rainfall(data_dict, preds_dict, rainfall_data):
    # Define line styles and colors for each landslide type
    line_styles = {
        'DS': {'color': 'red', 'linestyle': '-.'},
        'DF': {'color': 'orange', 'linestyle': '-.'},
        'ES': {'color': 'green', 'linestyle': '-.'},
        'EF': {'color': 'black', 'linestyle': '-.'},
        'RS': {'color': 'purple', 'linestyle': '-.'}
    }

    # Create dictionaries to hold cumulative and mean cumulative values for IG scores and rainfall
    cumulative_scores = {}
    mean_cumulative_scores = {}
    filtered_rainfall_means = {}

    # Filter and compute cumulative scores for each landslide type based on susceptibility > 0.5
    for label, data in data_dict.items():
        pred = preds_dict[label][:, 0]  # Access first dimension (1D) of the pred array

        # Check if pred array is 1D and data array is 3D
        if pred.ndim != 1 or data.ndim != 3:
            raise ValueError(f"Shape mismatch: pred should be 1D and data should be 3D. Got pred.shape={pred.shape}, data.shape={data.shape}")

        # Extract indices of rows where susceptibility is greater than 0.5
        selected_indices = np.where(pred > 0.5)[0]

        # Filter the data for selected indices (only locations and days, discard the third dimension)
        filtered_data = data[selected_indices, :, 0]  # Extract first two dimensions

        # Skip this label if no data meets the criteria
        if filtered_data.size == 0:
            continue

        # Calculate the cumulative mean score for the filtered data
        cumulative_data = np.cumsum(filtered_data, axis=1)
        mean_cumulative_scores[label] = np.mean(cumulative_data, axis=0)  # Take the mean across locations

        # Filter rainfall data for the selected indices and compute mean across locations
        filtered_rainfall = rainfall_data[selected_indices, :]
        filtered_rainfall_means[label] = np.mean(filtered_rainfall, axis=0)

    # Calculate overall cumulative mean of filtered rainfall across all landslide types
    all_filtered_rainfall = np.array(list(filtered_rainfall_means.values()))
    daily_filtered_rainfall_mean = np.mean(all_filtered_rainfall, axis=0)
    cumulative_rainfall_mean = np.cumsum(daily_filtered_rainfall_mean)

    # Plot the cumulative scores and cumulative rainfall
    fig, ax1 = plt.subplots(figsize=(10, 6), dpi=600)

    # Plot cumulative rainfall as a line on the primary y-axis
    ax1.plot(t, cumulative_rainfall_mean, label='Filtered cumulative rainfall (mm)', color='blue', linestyle='-', linewidth=2)
    ax1.set_xlabel('Days Before Event')
    ax1.set_ylabel('Cumulative rainfall (mm)', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.set_ylim(0, 450)  # Set y-limits to 0-500 for cumulative rainfall
    ax1.set_yticks(np.arange(0, 451, 50))  # Set ticks at intervals of 50

    # Create a secondary y-axis for the cumulative scores
    ax2 = ax1.twinx()
    # Add a horizontal line at 0 for cumulative scores
    ax2.axhline(y=0, color='grey', linestyle='-', linewidth=2)
    for label, mean_cumulative_score in mean_cumulative_scores.items():
        ax2.plot(t, mean_cumulative_score, label=f'Filtered {label} IG', **line_styles[label], linewidth=1.5)

    # Set the secondary y-axis for cumulative scores
    ax2.set_ylabel('IG Scores', color='black')
    ax2.tick_params(axis='y', labelcolor='black')
    ax2.set_ylim(-0.75, 0.75)  # Set y-limits for cumulative scores
    ax2.set_yticks(np.arange(-0.75, 0.76, 0.15))  # Set ticks at intervals of 0.15

    # Set the font properties globally
    plt.rc('font', family='Times New Roman', size=16)

    # Add both horizontal and vertical grid lines only on the secondary y-axis
    ax2.grid(True, axis='both', linestyle='-', linewidth=1)
    ax1.grid(True, axis='x', linestyle='--', linewidth=1)

    # Add legends
    ax1.legend(loc='upper left', fontsize=16)
    ax2.legend(loc='lower left', fontsize=16)

    # Reverse the x-axis labels to show days before event, labeled from 30 to 0
    ax1.invert_xaxis()
    ax1.set_xticks(np.arange(0, 31, 1))  # Set ticks at each day
    ax1.set_xticklabels(np.arange(30, -1, -1))  # Label from 30 down to 0

    # Set x-axis limits
    ax1.set_xlim(0, 30)

    # Adjust layout for a clean display
    plt.tight_layout()

    # Show the plot
    plt.show()

# Plot cumulative rainfall and scores using the filtered data
plot_cumulative_scores_and_rainfall(data_dict, preds_dict, rainfall_data)


In [None]:
# IG different plots with means and 90-10 percrentile CUMULATIVE VALUES
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Load all the cumulative IG scores (without squeezing)
data_dict = {
    'DS': np.load("ExplainedGradients/all_scores_DS_daily_75_IG.npy"),
    'DF': np.load("ExplainedGradients/all_scores_DF_daily_75_IG.npy"),
    'ES': np.load("ExplainedGradients/all_scores_ES_daily_75_IG.npy"),
    'EF': np.load("ExplainedGradients/all_scores_EF_daily_75_IG.npy"),
    'RS': np.load("ExplainedGradients/all_scores_RS_daily_75_IG.npy")
}

# Load the susceptibility predictions (without squeezing)
preds_dict = {
    'DS': np.load("Data/DS_daily_75_pred.npy"),
    'DF': np.load("Data/DF_daily_75_pred.npy"),
    'ES': np.load("Data/ES_daily_75_pred.npy"),
    'EF': np.load("Data/EF_daily_75_pred.npy"),
    'RS': np.load("Data/RS_daily_75_pred.npy")
}

# Load the daily rainfall data
rainfall_data = np.load("Data/daily_rain.npy")

# Set time for daily interval 31 days
t = range(31)

# Compute mean for rainfall (no percentiles this time)
mean_rainfall = np.mean(rainfall_data, axis=0)
cumulative_mean_rainfall = np.cumsum(mean_rainfall)

# Set font globally to Times New Roman
plt.rcParams.update({'font.family': 'Times New Roman'})

# Create subplots for each landslide type in a single figure
fig, axes = plt.subplots(len(data_dict), 1, figsize=(10, 6 * len(data_dict)), dpi=600, sharex=True)

# Define a function to shift the secondary y-axis ticks so that 0 aligns with -1.5 of the primary axis
def shift_yaxis_to_negative(ax1, ax2, y1_range=(-1.5, 1.6), y2_range=(0, 300), target_y1=-1.5):
    y1_span = y1_range[1] - y1_range[0]
    y2_span = y2_range[1] - y2_range[0]
    shift_factor = (target_y1 - y1_range[0]) / y1_span
    new_y2_zero = y2_range[0] + shift_factor * y2_span
    def shifted_ticks(x, pos):
        return f'{x - new_y2_zero:.0f}' if x >= new_y2_zero else f'{x - new_y2_zero:.0f}'
    ax2.yaxis.set_major_formatter(FuncFormatter(shifted_ticks))

# Plot cumulative results for each landslide type
for i, (label, data) in enumerate(data_dict.items()):
    ax2 = axes[i]  # Rainfall on the secondary axis (right)
    
    # Load the susceptibility prediction for the current landslide type
    pred = preds_dict[label][:, 0]  # Access first dimension (1D) of the pred array

    # Ensure the pred array is 1D (locations) and data array is 2D (locations, days, 1)
    if pred.ndim != 1 or data.ndim != 3:
        raise ValueError(f"Shape mismatch: pred should be 1D and data should be 3D. Got pred.shape={pred.shape}, data.shape={data.shape}")
    
    # Extract indices of rows where susceptibility is greater or lower than 0.5
    selected_indices = np.where(pred > 0.5)[0]

    # Filter the data for the selected indices (only locations and days, discard the third dimension)
    filtered_data = data[selected_indices, :, 0]  # Extract first two dimensions

    # Check if any rows were selected; if none, skip to the next plot
    if filtered_data.size == 0:
        continue

    # Compute mean and 10th/90th percentiles for landslide scores
    mean_scores = np.mean(filtered_data, axis=0)
    q25_scores = np.percentile(filtered_data, 10, axis=0)
    q75_scores = np.percentile(filtered_data, 90, axis=0)
    
    # Compute cumulative mean and cumulative percentiles
    cumulative_mean_scores = np.cumsum(mean_scores)
    cumulative_q25_scores = np.cumsum(q25_scores)
    cumulative_q75_scores = np.cumsum(q75_scores)
    
    # Create the primary y-axis for cumulative scores
    ax1 = ax2.twinx()  # Switch y-axes: now ax1 will be on the left and ax2 on the right
    ax1.plot(t, cumulative_mean_scores, label=f'Mean IG {label} Score', color='red', linestyle='-', linewidth=1.5)
    ax1.fill_between(t, cumulative_q25_scores, cumulative_q75_scores, color='red', alpha=0.3, label=f'10th-90th IG {label} Score Percentile')

    # Set y-axis limits for the primary axis (scores) 
    ax1.set_ylim(-1.5, 1.6)
    ax1.set_ylabel(f'IG {label} Scores', fontsize=16, color='red')
    ax1.tick_params(axis='y', labelcolor='red')

    # Plot cumulative rainfall on the secondary y-axis (right)
    ax2.plot(t, cumulative_mean_rainfall, label='Mean Cumulative Rainfall (mm)', color='blue', linestyle='-', linewidth=1.8)
    
    # Set y-axis limits for the secondary axis (rainfall)
    ax2.set_ylim(0, 270)  # Ensure the rainfall axis starts from zero
    ax2.set_ylabel('Cumulative Rainfall (mm)', fontsize=16, color='blue')
    ax2.tick_params(axis='y', labelcolor='blue')
    ax2.grid(True)

    # Shift the secondary y-axis ticks to align 0 with the -20000 value on the primary y-axis
    shift_yaxis_to_negative(ax1, ax2)

    # Add a baseline at 0 with a green dashed line on both y-axes
    ax1.axhline(0, color='green', linestyle='--', linewidth=0.8)
    ax2.axhline(0, color='green', linestyle='--', linewidth=0.8)

    # Legend handling
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines + lines2, labels + labels2, loc='upper left', fontsize=18)

    # Reverse the x-axis labels to show days before event, labeled from 30 to 0
    ax1.invert_xaxis()
    ax1.set_xticks(np.arange(0, 31, 1))  # Set ticks at each day
    ax1.set_xticklabels(np.arange(30, -1, -1))  # Label from 30 down to 0

# Shared x-axis and settings for the last subplot
axes[-1].set_xlabel('Days Before Event', fontsize=16)
plt.gca().invert_xaxis()

# Remove subplot titles and reduce space between subplots
plt.subplots_adjust(hspace=0)

# Set x-axis limits for all subplots
plt.xlim(0, 30)

# Adjust layout to prevent overlap
plt.tight_layout()

# Save the figure with all subplots
# plt.savefig("Plots/Cumulative_Rainfall_and_Scores_All_Types_daily_filtered_by_susceptibility.png")
# plt.savefig("Plots/Cumulative_Rainfall_and_Scores_All_Types_daily_filtered_by_susceptibility.pdf")

# Show the plot
plt.show()

# For SHAP GE scores

In [None]:
# copy and adopte the scripts above from ig to shap

# shap eg all means
import numpy as np
import matplotlib.pyplot as plt
import mpl_axes_aligner

# Load all the scores
data_dict = {
    'DS': np.squeeze(np.load("ExplainedGradients/DS_shap_values_Xt.npy")),
    'DF': np.squeeze(np.load("ExplainedGradients/DF_shap_values_Xt.npy")),
    'ES': np.squeeze(np.load("ExplainedGradients/ES_shap_values_Xt.npy")),
    'EF': np.squeeze(np.load("ExplainedGradients/EF_shap_values_Xt.npy")),
    'RS': np.squeeze(np.load("ExplainedGradients/RS_shap_values_Xt.npy"))
}

# Load the susceptibility predictions
preds_dict = {
    'DS': np.load("Data/DS_daily_75_pred.npy"),
    'DF': np.load("Data/DF_daily_75_pred.npy"),
    'ES': np.load("Data/ES_daily_75_pred.npy"),
    'EF': np.load("Data/EF_daily_75_pred.npy"),
    'RS': np.load("Data/RS_daily_75_pred.npy")
}

# Load the daily rainfall data
rainfall_data = np.load("Data/daily_rain.npy")

# Set time for daily interval 31 days and invert it to fit as last the day of the event
t = range(31)

def plot_daily_scores_and_rainfall(data_dict, preds_dict, rainfall_data):
    # Define line styles and colors for each landslide type
    line_styles = {
        'DS': {'color': 'red', 'linestyle': '-.'},
        'DF': {'color': 'orange', 'linestyle': '-.'},
        'ES': {'color': 'green', 'linestyle': '-.'},
        'EF': {'color': 'black', 'linestyle': '-.'},
        'RS': {'color': 'purple', 'linestyle': '-.'}
    }

    # Dictionary to hold mean IG scores per day and filtered rainfall means for each type
    mean_daily_scores = {}
    filtered_rainfall_means = {}

    # Filter and compute daily mean scores for each landslide type based on susceptibility > 0.5
    for label, data in data_dict.items():
        pred = preds_dict[label][:, 0]  # Access first dimension (1D) of the pred array

        # Reshape data if it's 2D to add a singleton third dimension
        if data.ndim == 2:
            data = data[:, :, np.newaxis]  # Add a singleton dimension

        # Check if pred array is 1D and data array is 3D
        if pred.ndim != 1 or data.ndim != 3:
            raise ValueError(f"Shape mismatch: pred should be 1D and data should be 3D. Got pred.shape={pred.shape}, data.shape={data.shape}")

        # Extract indices of rows where susceptibility is greater than 0.5
        selected_indices = np.where(pred > 0.5)[0]

        # Filter the data for selected indices (only locations and days, discard the third dimension)
        filtered_data = data[selected_indices, :, 0]  # Extract first two dimensions

        # Skip this label if no data meets the criteria
        if filtered_data.size == 0:
            continue

        # Calculate the mean score for each day across selected locations
        mean_daily_scores[label] = np.mean(filtered_data, axis=0)

        # Filter rainfall data for the selected indices and compute mean across locations
        filtered_rainfall = rainfall_data[selected_indices, :]
        filtered_rainfall_means[label] = np.mean(filtered_rainfall, axis=0)

    # Calculate overall mean of filtered rainfall across all landslide types
    all_filtered_rainfall = np.array(list(filtered_rainfall_means.values()))
    daily_filtered_rainfall_mean = np.mean(all_filtered_rainfall, axis=0)

    # Set initial y-axis limits
    ax1_min, ax1_max = 0, 140  # Rainfall range
    ax2_min, ax2_max = -0.05, 0.25  # IG score range

    # Plot the daily mean scores and daily rainfall
    fig, ax1 = plt.subplots(figsize=(10, 6), dpi=600)

    # Plot daily filtered rainfall as a line on the primary y-axis
    ax1.plot(t, daily_filtered_rainfall_mean, label='Filtered daily rainfall (mm)', color='blue', linestyle='-', linewidth=2, zorder=3)
    ax1.set_xlabel('Days Before Event', fontsize=16)
    ax1.set_ylabel('Daily Rainfall (mm)', color='blue', fontsize=16)
    ax1.tick_params(axis='y', labelcolor='blue')
    ax1.set_ylim(ax1_min, ax1_max)  # Adjusted y-limits for rainfall
    ax1.set_yticks(np.arange(ax1_min, ax1_max + 20, 20))

    # Create a secondary y-axis for the daily mean IG scores
    ax2 = ax1.twinx()
    
    # Add a horizontal line at 0 for IG scores on ax2, positioned underneath all other lines
    ax2.axhline(y=0, color='grey', linestyle='-', linewidth=2, zorder=1)
    
    # Plot each daily mean IG score on top of the grey line
    for label, mean_daily_score in mean_daily_scores.items():
        ax2.plot(t, mean_daily_score, label=f'Filtered {label}', **line_styles[label], linewidth=1.5, zorder=3)

    # Set the secondary y-axis for daily scores
    ax2.set_ylabel('SHAP_GE', color='black', fontsize=16)
    ax2.tick_params(axis='y', labelcolor='black')
    ax2.set_ylim(ax2_min, ax2_max)  # Adjusted y-limits for daily scores
    ax2.set_yticks(np.arange(ax2_min, ax2_max + 0.1, 0.1))

    # Set the font properties globally
    plt.rc('font', family='Times New Roman', size=16)

    # Add both horizontal and vertical grid lines only on the secondary y-axis
    ax2.grid(True, axis='both', linestyle='-', linewidth=1,)
    ax1.grid(True, axis='x', linestyle='--', linewidth=1)

    # Add legends
    ax1.legend(loc='upper center', fontsize=16)
    ax2.legend(loc='upper left', fontsize=16)

    # Reverse the x-axis labels to show days before event, labeled from 30 to 0
    ax1.invert_xaxis()
    ax1.set_xticks(np.arange(0, 31, 1))  # Set ticks at each day
    ax1.set_xticklabels(np.arange(30, -1, -1), fontsize=16)  # Label from 30 down to 0

    # Set x-axis limits
    ax1.set_xlim(0, 30)

    # Align y = 0 of ax1 and ax2 with the center of the figure
    mpl_axes_aligner.align.yaxes(ax1, 0, ax2, 0)

    # Adjust layout for a clean display
    plt.tight_layout()

    # Save the figure
    plt.savefig("Plots/SHAP_GE_Rainfall_and_Scores_only_means_daily.png")
    plt.savefig("Plots/SHAP_GE_Rainfall_and_Scores_only_means_daily.pdf")

    # Show the plot
    plt.show()

# Plot daily rainfall and scores using the filtered data
plot_daily_scores_and_rainfall(data_dict, preds_dict, rainfall_data)

In [None]:
# SHAP_GE Different Plots with Means and 90-10 Percentile Daily Values
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

# Load all the SHAP values (Xt time-series data)
data_dict = {
    'DS': np.squeeze(np.load("ExplainedGradients/DS_shap_values_Xt.npy")),
    'DF': np.squeeze(np.load("ExplainedGradients/DF_shap_values_Xt.npy")),
    'ES': np.squeeze(np.load("ExplainedGradients/ES_shap_values_Xt.npy")),
    'EF': np.squeeze(np.load("ExplainedGradients/EF_shap_values_Xt.npy")),
    'RS': np.squeeze(np.load("ExplainedGradients/RS_shap_values_Xt.npy"))
}

# Load the susceptibility predictions
preds_dict = {
    'DS': np.load("Data/DS_daily_75_pred.npy"),
    'DF': np.load("Data/DF_daily_75_pred.npy"),
    'ES': np.load("Data/ES_daily_75_pred.npy"),
    'EF': np.load("Data/EF_daily_75_pred.npy"),
    'RS': np.load("Data/RS_daily_75_pred.npy")
}

# Load the daily rainfall data
rainfall_data = np.load("Data/daily_rain.npy")

# Set time for daily interval (31 days)
t = np.arange(31)

# Set font globally to Times New Roman
plt.rcParams.update({'font.family': 'Times New Roman'})

# Set initial y-axis limits
ax1_min, ax1_max = 0, 170  # Rainfall range
ax2_min, ax2_max = -0.15, 0.30  # IG score range

# Function to plot daily results for each landslide type
def plot_daily_values(data_dict, preds_dict, rainfall_data):
    # Create subplots for each landslide type
    fig, axes = plt.subplots(len(data_dict), 1, figsize=(10, 6 * len(data_dict)), dpi=600, sharex=True)

    for i, (label, data) in enumerate(data_dict.items()):
        ax2 = axes[i]  # Secondary y-axis (right)

        pred = preds_dict[label][:, 0]  # Access first dimension (1D) of the pred array

        # Reshape data if it's 2D to add a singleton third dimension
        if data.ndim == 2:
            data = data[:, :, np.newaxis]

        # Check shape compatibility
        if pred.ndim != 1 or data.ndim != 3:
            raise ValueError(f"Shape mismatch: pred should be 1D and data should be 3D. Got pred.shape={pred.shape}, data.shape={data.shape}")
        
        # Extract indices of rows where susceptibility > 0.5
        selected_indices = np.where(pred < 0.5)[0]

        # Filter the data
        filtered_data = data[selected_indices, :, 0]
        filtered_rainfall = rainfall_data[selected_indices, :]
        if filtered_data.size == 0 or filtered_rainfall.size == 0:
            continue  # Skip if no data meets criteria

        # Compute mean and percentiles for daily SHAP values
        mean_scores = np.mean(filtered_data, axis=0)
        q10_scores = np.percentile(filtered_data, 5, axis=0)
        q90_scores = np.percentile(filtered_data, 95, axis=0)

        # Compute mean and percentiles for filtered rainfall
        mean_rainfall = np.mean(filtered_rainfall, axis=0)

        # Plot daily SHAP values on the primary y-axis
        ax1 = ax2.twinx()
        ax1.plot(t, mean_scores, label=f'Mean SHAP_GE {label}', color='red', linestyle='-', linewidth=1.5)
        ax1.fill_between(t, q10_scores, q90_scores, color='red', alpha=0.3, label=f'5th-95th Percentile')
        # Set the secondary y-axis for daily scores
        # Add both horizontal and vertical grid lines only on the secondary y-axis
        ax1.grid(True, axis='both', linestyle='-', linewidth=1,)
        ax1.set_ylabel('SHAP_GE', color='black', fontsize=16)
        ax1.tick_params(axis='y', labelcolor='black')
        ax1.set_ylim(ax2_min, ax2_max)  # Adjusted y-limits for daily scores
        ax1.set_yticks(np.arange(ax2_min, ax2_max + 0.05, 0.05))

        # Set the font properties globally
        plt.rc('font', family='Times New Roman', size=16)

        # Plot daily rainfall on the secondary y-axis
        ax2.plot(t, mean_rainfall, label=f'Mean Rainfall {label} (mm)', color='blue', linestyle='-', linewidth=1.8)
        ax2.set_ylabel('Daily Rainfall (mm)', color='blue', fontsize=16)
        ax2.tick_params(axis='y', labelcolor='blue')
        ax2.set_ylim(ax1_min, ax1_max)  # Adjusted y-limits for rainfall
        ax2.set_yticks(np.arange(ax1_min, ax1_max, 25))
        ax2.grid(True, axis='x', linestyle='--', linewidth=1)

        # Align y = 0 of ax1 and ax2 with the center of the figure
        mpl_axes_aligner.align.yaxes(ax1, 0, ax2, 0)

        # Add a baseline at 0
        ax1.axhline(0, color='green', linestyle='--', linewidth=0.8)
        ax2.axhline(0, color='green', linestyle='--', linewidth=0.8)

        # Add legends
        lines, labels = ax1.get_legend_handles_labels()
        lines2, labels2 = ax2.get_legend_handles_labels()
        ax1.legend(lines + lines2, labels + labels2, loc='upper left', fontsize=12)

        # Reverse the x-axis labels to show days before event, labeled from 30 to 0
        ax1.invert_xaxis()
        ax1.set_xticks(np.arange(0, 31, 1))  # Set ticks at each day
        ax1.set_xticklabels(np.arange(30, -1, -1), fontsize=16)  # Label from 30 down to 0

        # Set x-axis limits
        ax1.set_xlim(0, 30)

        # Remove x-axis label for all subplots except the last one
        if i < len(data_dict) - 1:
            ax2.set_xlabel("")  # Remove x-axis label

    # Add shared x-axis only for the last subplot
    axes[-1].set_xlabel('Days Before Event', fontsize=16)

    # Adjust layout
    plt.subplots_adjust(hspace=0.2)  # Reduce space between subplots
    plt.tight_layout()

    # Save the figure
    plt.savefig("Plots/SHAP_GE_Daily_Rainfall_min05.png")
    plt.savefig("Plots/SHAP_GE_Daily_Rainfall_min05.pdf")

    # Show the plot
    plt.show()

# Call the function
plot_daily_values(data_dict, preds_dict, rainfall_data)


## For the static predictors

In [5]:
import os

# os.chdir("/home/dahala/Transformer-Gorkha/")
import tensorflow as tf
import json
import numpy as np
import seaborn as sns
from src import preparedata
import matplotlib.pyplot as plt
from tensorflow.keras import layers, optimizers, losses, metrics, Model
from tqdm.notebook import tqdm

import shap

# Load all the SHAP values for static features (Xc)
data_dict = {
    #'DS': np.squeeze(np.load("ExplainedGradients/DS_shap_values_Xc.npy")),
    #'DF': np.squeeze(np.load("ExplainedGradients/DF_shap_values_Xc.npy")),
    #'ES': np.squeeze(np.load("ExplainedGradients/ES_shap_values_Xc.npy")),
    #'EF': np.squeeze(np.load("ExplainedGradients/EF_shap_values_Xc.npy")),
    'RS': np.squeeze(np.load("ExplainedGradients/RS_shap_values_Xc.npy"))
}

# Define static features
Xc_features = [
    'SaCongl', 'MaCl', 'SaLiPeSh', 'TectoCl', 'Mass', 'ClSiMa', 'SaPe', 'SaIntPe', 'Urb', 'Arboric',
    'Crop', 'Aquatic', 'PermGrass', 'Parks', 'Forest', 'MixAgricu', 'Shrubby', 'Extractiv',
    'RareVeg', 'Wetlands', 'mean_slp', 'std_slp', 'mean_asp', 'std_asp', 'mean_dipdir', 'std_dipdir',
    'mean_dip', 'std_dip', 'mean_plan', 'std_plan', 'mean_prof', 'std_prof'
]

# put inference true
params = json.load(open("params/params.json", "r")) #CHANGE IN PARAMS THE LANDSLIDE TYPE ACCORDING TO data_dict 
dataset = preparedata.readTransformerData(params["dataprepinargs"])
dataset.preparedata()

# Iterate through each landslide type in data_dict and plot the beeswarm
for label, shap_values_Xc in data_dict.items():
    # Convert SHAP values into a SHAP explanation object
    explainer = shap.Explanation(values=shap_values_Xc, data=dataset.Xc, feature_names=Xc_features)

    # Create a violin plot
    #shap.plots.beeswarm(explainer, show=False)
    shap.plots.violin(explainer, show=False)#, plot_type="layered_violin")
    #shap.plots.heatmap(explainer, show=False, max_display=4) # too big

    # Save the plot with the label in the filename
    plt.savefig(f"Plots/violin_static_features_{label}.png", dpi=650, bbox_inches="tight")
    plt.clf()  # Clear the figure for the next plot




<Figure size 800x950 with 0 Axes>