# Figure Feature Importance

In [None]:
# Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from palettable.cartocolors.qualitative import Safe_10 as cpal

In [None]:
# Directories
dir03 = '../paper_deficit/output/03_rf/'
dir06 = '../paper_deficit/output/06_eval/'

---

In [None]:
def load_and_merge_data(list_var_tar, scen):
    
    """Loads and merges data from multiple CSV files based on 'var_exp'."""
    
    # Load the base dataframe with 'var_exp' only from the first CSV
    df_all = pd.read_csv(
        os.path.join(dir03, 'files_importance', f'df_feature_imp_{list_var_tar[0]}_{scen}.csv'), 
        usecols=['var_exp'])
    
    # Load and merge data for each run string
    for var_tar in list_var_tar:
        file_in = f'df_feature_imp_{var_tar}_{scen}.csv'
        dfx = pd.read_csv(os.path.join(dir03, 'files_importance', file_in), 
                          usecols=['var_exp', 'imp_mean']) \
            .rename(columns=lambda col: f'{var_tar}_{scen}_{col}' if col != 'var_exp' else col)
            
        df_all = df_all.merge(dfx, on='var_exp')
     
    
    # Create var label
    df_all['var_label'] = df_all['var_exp'] \
        .str.replace('worldclim_bio', 'worldclim_clim') \
        .str.split('_').str[1]
    
    return df_all

In [None]:
# Define subplot
def plot_sub(ax, var_tar, scen, title, show_legend=False):
    if var_tar in ['agbc_min', 'agbc_mean', 'agbc_max']:
        list_var_tar = ['agbc_min', 'agbc_mean', 'agbc_max']
    if var_tar in ['bgbc_min', 'bgbc_mean', 'bgbc_max']:
        list_var_tar = ['bgbc_min', 'bgbc_mean', 'bgbc_max']
    if var_tar in ['soc_min', 'soc_mean', 'soc_max']:
        list_var_tar = ['soc_min', 'soc_mean', 'soc_max']

    df_all = load_and_merge_data(list_var_tar, scen)

    col_mean = f'{var_tar.split('_')[0]}_mean_{scen}_imp_mean'
    col_min = f'{var_tar.split('_')[0]}_min_{scen}_imp_mean'
    col_max = f'{var_tar.split('_')[0]}_max_{scen}_imp_mean'

    df_all = df_all.sort_values(by=col_mean, ascending=False)
    
    labels = df_all['var_label']
    x = np.arange(len(labels))

    width=0.25
    colors=cpal.mpl_colors[:3]
    
    ax.bar(x-width, df_all[col_mean], width=width, label="Mean scenario", color=colors[0])
    ax.bar(x, df_all[col_min], width=width, label="Min scenario", color=colors[1])
    ax.bar(x+width, df_all[col_max], width=width, label="Max scenario", color=colors[2])
    
    if show_legend:
        ax.legend(frameon=False)
    ax.set_title(title)
    ax.set_xticks(x)
    ax.set_xticklabels(labels, rotation=90, ha='center')

    ax.set_ylabel('MDI Feature Importance')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['bottom'].set_linewidth(1.5)
    ax.xaxis.set_tick_params(width=1.5)
    ax.yaxis.set_tick_params(width=1.5)

In [None]:
# Plot
fig, axs = plt.subplots(figsize=(12, 12), ncols=2, nrows=3, dpi=600)
axs = axs.ravel()

plot_sub(axs[0], 'agbc_max', 'prim', 'AGBC - Pristine land assumption', True)
plot_sub(axs[1], 'agbc_max', 'secd', 'AGBC - Low human influence assumption')

plot_sub(axs[2], 'bgbc_max', 'prim', 'BGBC - Pristine land assumption')
plot_sub(axs[3], 'bgbc_max', 'secd', 'BGBC - Low human influence assumption')

plot_sub(axs[4], 'soc_mean', 'prim', 'SOC 0-30 cm - Pristine land assumption')
plot_sub(axs[5], 'soc_mean', 'secd', 'SOC 0-30 cm - Low human influence assumption')

plt.tight_layout()

# Export
plt.savefig(os.path.join(dir06, 'pdf/figs15_feature_importance.pdf'), dpi=600)
plt.savefig(os.path.join(dir06, 'png/figs15_feature_importance.png'), dpi=600);