# Landcover Evaluation Results

## Initialisation

In [None]:
import json
import numpy as np
import os
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
from matplotlib.legend import Legend
from matplotlib.container import BarContainer

import initialise
import common
from analysis_utils import calc_statistics
from display_utils import display_frames

In [None]:
SCENARIOS = ['within-site', 'out-of-site']
DISPLAY_NAMES = [' '.join([s.capitalize(), 'Models']) for s in SCENARIOS]
samples_file = os.path.join(common.DATASETS_DIR, 'samples_365days.csv')
model_dir1 = os.path.join(common.MODELS_DIR, f'{SCENARIOS[0]}_models')
model_dir2 = os.path.join(common.MODELS_DIR, f'{SCENARIOS[1]}_models')
output_dir = common.FIGURES_DIR

In [None]:
WITHIN_SITE_RMSE = 20.9
OUT_OF_SITE_RMSE = 25.4
WITHIN_SITE_R2 = 0.70
OUT_OF_SITE_R2 = 0.54
TEXT_SIZE = 7
FONT_SIZE = 8
TITLE_SIZE = 10
MARKER_SIZE = 7
LINE_WIDTH = 1
SATURATION = 0.6

In [None]:
model = common.ANALYSIS_MODEL
ensemble_size = common.ENSEMBLE_SIZE
ensemble_runs = common.ENSEMBLE_RUNS

### Load Samples and Predictions

In [None]:
with open(os.path.join(model_dir1, 'model_params.json'), 'r') as f:
    ws_params = json.load(f)
all_samples = pd.read_csv(samples_file, index_col=0)
predict2 = pd.read_csv(os.path.join(model_dir2, f'ensemble{ensemble_size}_{model}.csv'), index_col=0).reindex(all_samples.index)

predict1 = pd.read_csv(os.path.join(model_dir1, f'ensemble{ensemble_size}_{model}.csv'), index_col=0)
samples1 = all_samples.reindex(predict1.index)

### Create Land Cover groups

In [None]:
land_cover = sorted(all_samples['Land Cover'].unique())

landcover_groups = {
    'Agriculture': [0, 3, 5],
    'Forest': [6, 9, 11, 12, 13, 14],
    'Grassland': [1, 2, 4],
    'Shrubland': [7],
    'Other': [8, 10, 15, 16],
}

lc_summary = all_samples.groupby(['Land Cover', 'Site'], as_index=False).size().groupby(['Land Cover']).agg({'size': 'sum', "Site": "count"})
lc_summary['landcover_group'] = ''
for group, classes in landcover_groups.items():
    lc = [land_cover[c] for c in classes]
    lc_summary.loc[lc, 'landcover_group'] = group
lc_summary = lc_summary.reset_index()
lc_summary.columns = ['Land Cover', '#Samples', '#Sites', 'Category']
lc_summary[['Category', 'Land Cover', '#Samples', '#Sites']].sort_values(['Category', 'Land Cover'])

### Create Elevation Groups

In [None]:
upper = 6000
elevation_groups = {
    '< 500m': [0/upper, 500/upper],
    '500-1000m': [500/upper, 1000/upper],
    '1000-1500m': [1000/upper, 1500/upper],
    '1500-2000m': [1500/upper, 2000/upper],
    '> 2000m': [2000/upper, 6000/upper],
}

elevation_summ = []
for label, heights in elevation_groups.items():
    hdf = all_samples[(all_samples.Elevation.between(heights[0], heights[1]))]
    sdf = hdf[['Site', 'Land Cover', 'Czone3']].drop_duplicates()
    samples = hdf.Elevation.size
    sites = sdf.Site.size  #hdf.Site.unique().size
    lc = sdf['Land Cover'].mode()
    cz = sdf['Czone3'].mode()
    elevation_summ.append({'Elevation': label, '#Samples': samples, '#Sites': sites, 'Land Cover': lc[0], 'Climate Zone': cz[0]})
pd.DataFrame(elevation_summ)

## Analysis
### Analysis by Land Cover

In [None]:
def landcover_results(samples, predictions, scenario):
    lc_predicts = []
    lc_tests = []
    lc_samples = []
    lc_y = []
    for group, classes in landcover_groups.items():
        lc = [land_cover[c] for c in classes]
        y = samples[samples['Land Cover'].isin(lc)]['LFMC value']
        z = predictions[samples['Land Cover'].isin(lc)].reindex(y.index)
        if z.shape[0] >= 10:
            lc_y.append(y)
            lc_predicts.append(z)
            lc_tests.append(group)
            lc_samples.append(y.count())

    lc_results = pd.DataFrame(lc_samples, columns=['Samples'], index=lc_tests).rename_axis(['Land Cover'])

    lc_stats = []
    for i, pred_ in enumerate(lc_predicts):
        e = {}
        for model_ in pred_.columns:
            e[model_] = calc_statistics(lc_y[i], pred_[model_], ybar=samples['LFMC value'].mean())
            e[model_]['Samples'] = lc_samples[i] #pred_.shape[0]
        lc_stats.append(pd.DataFrame.from_dict(e, orient='index'))

    edf = pd.DataFrame.from_dict({lc_tests[n]: e.mean() for n, e in enumerate(lc_stats)}, orient='index').convert_dtypes()
    sdf = pd.DataFrame.from_dict({lc_tests[n]: e.std() for n, e in enumerate(lc_stats)}, orient='index').convert_dtypes()
    lc_results['Mean LFMC'] = [y.mean() for y in lc_y]
    lc_results['Variance'] = [y.var() for y in lc_y]
    lc_results['Bias'] = list(edf.Bias)
    lc_results['RMSE'] = list(edf.RMSE)
    lc_results['ubRMSE'] = list(edf.ubRMSE)
    lc_results['R2'] = list(edf.R2)
    lc_results['Bias_std'] = list(sdf.Bias)
    lc_results['RMSE_std'] = list(sdf.RMSE)
    lc_results['ubRMSE_std'] = list(sdf.ubRMSE)
    lc_results['R2_std'] = list(sdf.R2)
    
    vars_ = list(landcover_groups.keys())
    bp_data = []
    for n, t in enumerate(lc_results.index):
        if t in vars_:
            d = {p_[0]: pd.concat([p_[1], lc_y[n], p_[1] - lc_y[n]], keys=['Estimated LFMC', 'LFMC value', 'Error'], axis=1) for p_ in lc_predicts[n].iteritems()}
            for m, df_ in d.items():
                df_['Model'] = m
            d = pd.concat(d.values())
            d['Land cover'] = t
#            d['Model'] = scenario.capitalize()
            bp_data.append(d)
    bp_data = pd.concat(bp_data)
    
    return bp_data, lc_results


In [None]:
lc1_plot_data, lc1_results = landcover_results(samples1, predict1, SCENARIOS[0])
lc2_plot_data, lc2_results = landcover_results(all_samples, predict2, SCENARIOS[1])

heading = f'Results by Landcover'
print('=' * len(heading))
print(heading)
print('=' * len(heading))
display_frames([lc1_results, lc2_results], DISPLAY_NAMES, precision=2)


### Analysis by Land Cover and Elevation

In [None]:
def landcover_elevation_results(samples, predictions):
    lc_y = []
    lc_predicts = []
    lc_tests = []
    lc_samples = []
    for group, classes in landcover_groups.items():
        lc = [land_cover[c] for c in classes]
        for label, heights in elevation_groups.items():
            ind = (samples['Land Cover'].isin(lc)) & (samples.Elevation.between(heights[0], heights[1]))
            y = samples[ind]['LFMC value']
            z = predictions[ind].reindex(y.index)
            if z.shape[0] >= 10:
                lc_y.append(y)
                lc_predicts.append(z)
                lc_tests.append([group, label])
                lc_samples.append(y.count())

    lc_results = pd.DataFrame(lc_samples, columns=['Samples'], index=[[t[0] for t in lc_tests], [t[1] for t in lc_tests]])
    lc_results = lc_results.rename_axis(['Land Cover', 'Elevation'])

    lc_stats = []
    for i, pred_ in enumerate(lc_predicts):
        e = {}
        for model_ in pred_.columns:
            e[model_] = calc_statistics(lc_y[i], pred_[model_], ybar=samples['LFMC value'].mean())
            e[model_]['Samples'] = lc_samples[i]
        lc_stats.append(pd.DataFrame.from_dict(e, orient='index'))
        
    edf = pd.DataFrame.from_dict({';'.join(lc_tests[n]): e.mean() for n, e in enumerate(lc_stats)}, orient='index').convert_dtypes()
    sdf = pd.DataFrame.from_dict({';'.join(lc_tests[n]): e.std() for n, e in enumerate(lc_stats)}, orient='index').convert_dtypes()
    lc_results['Mean LFMC'] = [y.mean() for y in lc_y]
    lc_results['Variance'] = [y.var() for y in lc_y]
    lc_results['Bias'] = list(edf.Bias)
    lc_results['RMSE'] = list(edf.RMSE)
    lc_results['ubRMSE'] = list(edf.ubRMSE)
    lc_results['R2'] = list(edf.R2)
    lc_results['Bias_std'] = list(sdf.Bias)
    lc_results['RMSE_std'] = list(sdf.RMSE)
    lc_results['ubRMSE_std'] = list(sdf.ubRMSE)
    lc_results['R2_std'] = list(sdf.R2)

    labels = list(elevation_groups.keys())
    vars_ = list(landcover_groups.keys())[1:-1]
    bp_data = []
    for n, t in enumerate(lc_tests):
        if t[0] in vars_ and t[1] in labels:
            d = [pd.concat([p_[1], lc_y[n], p_[1] - lc_y[n]], keys=['Estimated LFMC', 'LFMC value', 'Error'], axis=1) for p_ in lc_predicts[n].iteritems()]
            d = pd.concat(d)
            d['Land cover'] = t[0]
            d['Elevation'] = t[1]
            bp_data.append(d)
    bp_data = pd.concat(bp_data)
    
    return bp_data, lc_results

In [None]:
lc3_plot_data, lc3_results = landcover_elevation_results(samples1, predict1)
lc4_plot_data, lc4_results = landcover_elevation_results(all_samples, predict2)

heading = f'Results by Landcover and Elevation'
print('=' * len(heading))
print(heading)
print('=' * len(heading))

display_frames([lc3_results, lc4_results], DISPLAY_NAMES, precision=2)

#### Elevation distributions
Show the proportion of each class at each elevation

In [None]:
dist1 = (lc3_results.Samples.unstack().T / lc3_results.Samples.unstack().sum(axis=1) * 100).round().loc[elevation_groups]
dist2 = (lc4_results.Samples.unstack().T / lc4_results.Samples.unstack().sum(axis=1) * 100).round().loc[elevation_groups]
display_frames([dist1, dist2], DISPLAY_NAMES, precision=0)

## Generate Figures
### Landcover Figure

In [None]:
def samples_plot_lc(data, summ, ax, label, xlim):
    data = data.reset_index()
    data = data[data['Land cover'].isin(lc_labels)]
    sns.barplot(y=data['Land cover'], x=data['LFMC value'], ci='sd',
                saturation=SATURATION, error_kw=dict(lw=LINE_WIDTH), ax=ax, zorder=2)
    for line in ax.lines:
        line.set_linewidth(LINE_WIDTH)
    ax.set_ylabel('Land cover')
    ax.set_xlabel(f'({label}) Measured LFMC', fontsize=FONT_SIZE)

    for ytick in ax.get_yticks():
        c = summ.loc[lc_labels[ytick]]
        ax.text(2, ytick, c, va='center', size=TEXT_SIZE, zorder=3)
  
    ylim = ax1.get_ylim()
    ax.text(2, ylim[1], 'Samples', va='bottom')

    ax.set_xlim(xlim)
    ax.set_xticks(range(xlim[0], xlim[1]+1, 50))
    ax.set_xticklabels([f'{i}%' for i in range(xlim[0], xlim[1]+1, 50)], fontsize=TEXT_SIZE)
    return ax

In [None]:
def plot_landcover(bp_data, lc_results, ax, xlim, xline, ylim, label):
    r1 = lc_results.reset_index()
    r1 = r1[r1['Land Cover'].isin(lc_labels)]
    r1 = r1.set_index(['Land Cover'])

    sns.boxplot(y='Land cover', x='Error', data=bp_data,
                saturation=SATURATION, width=0.8, linewidth=LINE_WIDTH, showfliers=False, showcaps=False,
                ax=ax, zorder=2, boxprops={'zorder': 2}, whiskerprops={'zorder': 2})

    bias = bp_data.groupby(['Land cover']).Error.mean().round(2)
    for ytick in ax.get_yticks():
        rmse = r1.RMSE.loc[lc_labels[ytick]]
        rmse_std = r1.RMSE_std.loc[lc_labels[ytick]]
        n_samples = r1.Samples.loc[lc_labels[ytick]]
        bias_ = bias.loc[lc_labels[ytick]]

        # Add the bias text
        t = ax.text(xlim[0]+1, ytick, f'{bias_: .2f}', ha='left', va='bottom', color='k', size=7)

        # Add the RMSE marker
        pt, = ax.plot(rmse, ytick, 'dk', markersize=7, scalex=False, zorder=3,
                      mfc=plt.rcParams["axes.prop_cycle"].by_key()["color"][ytick])

        pos_ = round(rmse, 2) + 3
        rmse_text = f'{rmse:.2f}'
        t = ax.text(pos_, ytick, rmse_text, va='bottom', color='k', size=7)
            
    ax.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)
    ax.set_ylabel(None)
    ax.set_xlabel(f'({label}) Estimation Error Distribution,\nRMSE and Bias', fontsize=FONT_SIZE)
    ax.set_xlim(xlim)
    ax.set_xticks(range(-50, 71, 25))
    ax.set_xticklabels([f'{i}%' for i in range(-50, 71, 25)], fontsize=TEXT_SIZE)
    ax.text(xlim[0]+1, ylim[1], 'Bias (%)', va='bottom')
    return ax

In [None]:
def plot_landcover_r2(bp_data, lc_results, ax, xlim, xline, ylim, label):
    r1 = lc_results.reset_index()
    r1 = r1[r1['Land Cover'].isin(lc_labels)]
    r1 = r1.set_index(['Land Cover'])

    sns.boxplot(y='Land cover', x='Estimated LFMC', data=bp_data,
                saturation=SATURATION, width=0.8, linewidth=LINE_WIDTH, showfliers=False, showcaps=False,
                ax=ax, zorder=2, boxprops={'zorder': 2}, whiskerprops={'zorder': 2})
    ax.set_xlabel(f'({label}) LFMC Estimation Distribution and $R^2$', fontsize=FONT_SIZE)
    ax.set_xlim(xlim)
    ax.set_xticks(range(xlim[0], xlim[1]+1, 40))
    ax.set_xticklabels([f'{i}%' for i in range(xlim[0], xlim[1]+1, 40)], fontsize=TEXT_SIZE)
    ax.set_ylabel(None)

    ax1=ax.twiny()
    ax1.set_xlim([0, 1])
    plt.setp(ax1.get_xticklabels(), color="blue")
    ax1.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)

    for ytick in ax.get_yticks():
        r2 = r1.R2.loc[lc_labels[ytick]]
        r2_std = r1.R2_std.loc[lc_labels[ytick]]

        # Add the R2 marker
        pt, = ax1.plot(r2, ytick, '*k', markersize=9, scalex=False,
                      mfc=plt.rcParams["axes.prop_cycle"].by_key()["color"][ytick])

        # Add the R2 text
        pos_ = round(r2, 2)
        r2_text = f'{r2:.2f}' #'\n$\pm${r2_std:.2f}' 
        ha = 'right' if r2 < 0.4 else 'left'
        pos_ = (pos_ - 0.04) if r2 < 0.4 else (pos_ + 0.03)
        t = ax1.text(pos_, ytick, r2_text, va='bottom', ha=ha, color='b', size=7)

    ax1.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    ax1.set_xticklabels(['$R^2$', 0.2, 0.4, 0.6, 0.8, 1.0])
    return ax

In [None]:
lc_labels = list(landcover_groups.keys())
sns.set_palette(sns.color_palette('terrain'))
plt.rcParams.update({'font.size': TEXT_SIZE})

xlim_lfmc = (0, 150)
xlim_rmse = (-50, 55)
xlim_r2 = (-0.5, 1.05)
xlim_pred = (40, 210)

fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(
    2, 3, figsize=(7.364, 4), gridspec_kw={'width_ratios': [1, 2, 2]},
    sharey=True, sharex=False,
    constrained_layout=True, dpi=500, linewidth=2, edgecolor="black")
fig.set_constrained_layout_pads(hspace=0.1)

ax1 = samples_plot_lc(lc1_plot_data, lc1_results['Samples'], ax1, 'a', xlim_lfmc)
ylim = ax1.get_ylim()
xline = samples1['LFMC value'].mean()
ax1.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)
ax1.set_ylim(ylim)
ax2 = plot_landcover(lc1_plot_data, lc1_results, ax2, xlim_rmse, ylim=ylim, xline=WITHIN_SITE_RMSE, label='b')
ax2.set_title(f'{DISPLAY_NAMES[0]}', y=1.1, fontsize=TITLE_SIZE)
ax3 = plot_landcover_r2(lc1_plot_data, lc1_results, ax3, xlim_pred, ylim=ylim, xline=WITHIN_SITE_R2, label='c')

xlim_lfmc = (0, 150)
xlim_rmse = (-75, 60)
xlim_r2 = (-0.5, 1.05)
xlim_pred = (40, 210)

ax4 = samples_plot_lc(lc2_plot_data, lc2_results['Samples'], ax4, 'd', xlim_lfmc)
ylim = ax4.get_ylim()
xline = all_samples['LFMC value'].mean()
ax4.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)
ax4.set_ylim(ylim)
ax5 = plot_landcover(lc2_plot_data, lc2_results, ax5, xlim_rmse, ylim=ylim, xline=OUT_OF_SITE_RMSE, label='e')
ax5.set_title(f'{DISPLAY_NAMES[1]}', y=1.1, fontsize=TITLE_SIZE)
ax6 = plot_landcover_r2(lc2_plot_data, lc2_results, ax6, xlim_pred, ylim=ylim, xline=OUT_OF_SITE_R2, label='f')

fig.savefig(os.path.join(output_dir, 'Land Cover.jpeg'), format="jpeg", bbox_inches='tight', pad_inches=0.1, dpi=500)

### Landcover and Elevation Figure

In [None]:
def samples_plot_el(data, summ, ax, label, xlim):
    data = data.reset_index()
    data = data[data.Elevation.isin(labels) & data['Land cover'].isin(vars_)]
    sns.barplot(y=data['Elevation'], x=data['LFMC value'], hue=data['Land cover'], ci='sd',
                saturation=SATURATION, error_kw=dict(lw=LINE_WIDTH), ax=ax, zorder=2)
    for line in ax.lines:
        line.set_linewidth(LINE_WIDTH)
    ax.set_ylabel('Elevation')
    ax.set_xlabel(f'({label}) Measured LFMC', fontsize=FONT_SIZE)

    ng = len(vars_)
    ne = len(labels)
    for ytick in ax.get_yticks():
        for lc in range(ng):
            c = summ.loc[vars_[lc], labels[ytick]]
            y_off = ytick + [-0.267, 0.0, .267][lc]
            ax.text(2, y_off, c, va='center', size=TEXT_SIZE, zorder=3)
  
    ylim = ax1.get_ylim()
    xpos = 15 if label == 'a' else 2
    ax.text(xpos, ylim[1], 'Samples', va='bottom')

    ax.set_xlim(xlim)
    ax.set_xticks(range(xlim[0], xlim[1]+1, 50))
    ax.set_xticklabels([f'{i}%' for i in range(xlim[0], xlim[1]+1, 50)], fontsize=TEXT_SIZE)
    return ax

In [None]:
def plot_landcover_elevation(bp_data, lc_results, ax, xlim, xline, ylim, label):
    r1 = lc_results.reset_index()
    r1 = r1[r1.Elevation.isin(labels) & r1['Land Cover'].isin(vars_)]
    r1 = r1.set_index(['Elevation', 'Land Cover'])

    sns.boxplot(y='Elevation', x='Error', hue='Land cover', data=bp_data, order=labels,
                saturation=SATURATION, width=0.8, linewidth=LINE_WIDTH, showfliers=False, showcaps=False,
                ax=ax, zorder=2, boxprops={'zorder': 2}, whiskerprops={'zorder': 2})

    bias = bp_data.groupby(['Elevation', 'Land cover']).Error.mean().round(2)
    ng = len(vars_)
    ne = len(labels)
    need_legend = True
    legend_points = []
    for ytick in ax.get_yticks():
        for lc in range(ng):
            y_off = ytick + [-0.267, 0.0, .267][lc]
            rmse = r1.RMSE.loc[labels[ytick], vars_[lc]]
            rmse_std = r1.RMSE_std.loc[labels[ytick], vars_[lc]]
            n_samples = r1.Samples.loc[labels[ytick], vars_[lc]]
            bias_ = bias.loc[labels[ytick], vars_[lc]]
            
            # Add the bias text
            t = ax.text(xlim[0]+1, y_off, f'{bias_: .2f}', ha='left', va='bottom', color='k', size=7)
            
            # Add the RMSE marker
            pt, = ax.plot(rmse, y_off, 'dk', markersize=7, scalex=False, zorder=3,
                          mfc=plt.rcParams["axes.prop_cycle"].by_key()["color"][lc])
            
            pos_ = round(rmse, 2) + 3
            rmse_text = f'{rmse:.2f}'
            t = ax.text(pos_, y_off, rmse_text, va='bottom', color='k', size=7)
            
    ax.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)
    ax.set_ylabel(None)
    ax.set_xlabel(f'({label}) Estimation Error Distribution,\nRMSE and Bias', fontsize=FONT_SIZE)
    ax.set_xlim(xlim)
    ax.set_xticks(range(-50, 71, 25))
    ax.set_xticklabels([f'{i}%' for i in range(-50, 71, 25)], fontsize=TEXT_SIZE)
    ax.text(xlim[0]+1, ylim[1], 'Bias (%)', va='bottom')
    return ax, legend_points

In [None]:
def plot_landcover_elevation_r2(bp_data, lc_results, ax, xlim, xline, ylim, label):
    r1 = lc_results.reset_index()
    r1 = r1[r1.Elevation.isin(labels) & r1['Land Cover'].isin(vars_)]
    r1 = r1.set_index(['Elevation', 'Land Cover'])

    sns.boxplot(y='Elevation', x='Estimated LFMC', hue='Land cover', data=bp_data, order=labels,
                saturation=SATURATION, width=0.8, linewidth=LINE_WIDTH, showfliers=False, showcaps=False,
                ax=ax, zorder=2, boxprops={'zorder': 2}, whiskerprops={'zorder': 2})
    ax.set_xlabel(f'({label}) LFMC Estimation Distribution and $R^2$', fontsize=FONT_SIZE)
    ax.set_xlim(xlim)
    ax.set_xticks(range(40, xlim[1]+1, 40))
    ax.set_xticklabels([f'{i}%' for i in range(40, xlim[1]+1, 40)], fontsize=TEXT_SIZE)
    ax.set_ylabel(None)

    ax1=ax.twiny()
    ax1.set_xlim([0, 1])
    plt.setp(ax1.get_xticklabels(), color="blue")
    ax1.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)

    ng = len(vars_)
    ne = len(labels)
    need_legend = True
    legend_points = []
    for ytick in ax.get_yticks():
        for lc in range(ng):
            y_off = ytick + [-0.267, 0.0, .267][lc]
            r2 = r1.R2.loc[labels[ytick], vars_[lc]]
            r2_std = r1.R2_std.loc[labels[ytick], vars_[lc]]
            
            # Add the R2 marker
            pt, = ax1.plot(r2, y_off, '*k', markersize=9, scalex=False,
                          mfc=plt.rcParams["axes.prop_cycle"].by_key()["color"][lc])
            
            # Add the R2 text
            pos_ = round(r2, 2)
            r2_text = f'{r2:.2f}' #'\n$\pm${r2_std:.2f}' 
            ha = 'right' if r2 < 0.4 else 'left'
            pos_ = (pos_ - 0.04) if r2 < 0.4 else (pos_ + 0.03)
            t = ax1.text(pos_, y_off, r2_text, va='bottom', ha=ha, color='b', size=7)

    ax1.set_xticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    ax1.set_xticklabels(['$R^2$', 0.2, 0.4, 0.6, 0.8, 1.0])
    return ax, legend_points

In [None]:
labels = list(elevation_groups.keys())
vars_ = list(landcover_groups.keys())[1:-1]

xlim_lfmc = (0, 190)
xlim_rmse = (-70, 70)
xlim_r2 = (-0.5, 1.05)
xlim_pred = (35, 250)

plt.rcParams.update({'font.size': TEXT_SIZE})
sns.set_palette(sns.color_palette('terrain')[1:])
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(
    2, 3, figsize=(7.364, 8), gridspec_kw={'width_ratios': [1, 2, 2]},
    sharey=True, sharex=False,
    constrained_layout=True, dpi=500, linewidth=2, edgecolor="black")
fig.set_constrained_layout_pads(hspace=0.05)

ax1 = samples_plot_el(lc3_plot_data, lc3_results['Samples'], ax=ax1, label='a', xlim=xlim_lfmc)
ylim = ax1.get_ylim()
xline = samples1['LFMC value'].mean()
ax1.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)
ax1.set_ylim(ylim)

ax2, legend_points1 = plot_landcover_elevation(lc3_plot_data, lc3_results, ax2, xlim_rmse, ylim=ylim, xline=WITHIN_SITE_RMSE, label='b')
ax2.set_title(f'{DISPLAY_NAMES[0]}', y=1.05, fontsize=TITLE_SIZE)

bp_data3 = lc3_plot_data.reset_index().merge(predict1.stack().rename('Estimated LFMC').reset_index())
ax3, legend_points2 = plot_landcover_elevation_r2(lc3_plot_data, lc3_results, ax3, xlim_pred, ylim=ylim, xline=WITHIN_SITE_R2, label='c')

xlim_lfmc = (0, 175)
xlim_rmse = (-70, 70)
xlim_r2 = (-0.5, 1.05)
xlim_pred = (35, 230)

ax4 = samples_plot_el(lc4_plot_data, lc4_results['Samples'], ax=ax4, label='d', xlim=xlim_lfmc)
ylim = ax4.get_ylim()
xline = all_samples['LFMC value'].mean()
ax4.plot([xline, xline], ylim, '--', color='grey', linewidth=LINE_WIDTH, zorder=1)

ax5, legend_points3 = plot_landcover_elevation(lc4_plot_data, lc4_results, ax5, xlim_rmse, ylim=ylim, xline=OUT_OF_SITE_RMSE, label='e')
ax5.set_title(f'{DISPLAY_NAMES[1]}', y=1.05, fontsize=TITLE_SIZE)

bp_data4 = lc4_plot_data.reset_index().merge(predict2.stack().rename('Estimated LFMC').reset_index())
ax6, legend_points4 = plot_landcover_elevation_r2(lc4_plot_data, lc4_results, ax6, xlim_pred, ylim=ylim, xline=OUT_OF_SITE_R2, label='f')

ax1.set_yticklabels([l.replace('-', ' -\n') for l in labels])
ax1.yaxis.label_pad = 0.0
ax1.legend(loc='center', bbox_to_anchor=(-0.26, 1.07), labelspacing=0.3, borderpad=0.3, handletextpad=0.3)
ax2.get_legend().remove()
ax3.get_legend().remove()
ax4.get_legend().remove()
ax5.get_legend().remove()
ax6.get_legend().remove()

fig.savefig(os.path.join(output_dir, 'Land Cover Elevation.jpeg'), format="jpeg", bbox_inches='tight', pad_inches=0.1, dpi=500)

### Fire Danger Threshold Figure

In [None]:
def threshold_results(y, yhat, threshold):
    counts = []
    total = y.count() / 100
    for i in yhat.columns:
        counts.append([(y.le(threshold) & yhat[i].le(threshold)).sum() / total,
                       (y.le(threshold) & yhat[i].gt(threshold)).sum() / total,
                       (y.gt(threshold) & yhat[i].le(threshold)).sum() / total,
                       (y.gt(threshold) & yhat[i].gt(threshold)).sum() / total])
    counts = pd.DataFrame(counts, columns=['High fire risk', 'Warning failures', 'False alarms', 'Low fire risk'])

    df = []
    for n, c in counts.items():
        df.append(pd.DataFrame([c.mean(), c.std()], index=['mean', 'std dev'], columns=[n]))
    return pd.concat(df, axis=1) #.round(2).astype(int)

In [None]:
def threshold_summary(samples1, predict1, samples2, predict2, threshold, ybar):
    results = [threshold_results(samples1, predict1, threshold), threshold_results(samples2, predict2, threshold)]

    r1 = []
    for c, i in predict1.items():
        samples_index = samples1.le(threshold)
        r1.append(calc_statistics(samples1[samples_index], i[samples_index], ybar[0]))

    r2 = []
    for c, i in predict2.items():
        samples_index = samples2.le(threshold)
        r2.append(calc_statistics(samples2[samples_index], i[samples_index], ybar[1]))

    means = pd.concat([pd.DataFrame(r1), pd.DataFrame(r2)], axis=1, keys=DISPLAY_NAMES).mean().unstack()
    stds = pd.concat([pd.DataFrame(r1), pd.DataFrame(r2)], axis=1, keys=DISPLAY_NAMES).std().unstack()
    rmse120 = pd.DataFrame([means.RMSE, stds.RMSE, means.Bias, stds.Bias, means.R2, stds.R2],
                           index=['RMSE mean', 'RMSE std', 'Bias mean', 'Bias std', 'R2 mean', 'R2 std']).round(2)
    results.append(rmse120)

    display_frames([r.T for r in results], DISPLAY_NAMES + [f'LFMC < {threshold}'])
    return results

In [None]:
ybar = (samples1['LFMC value'].mean(), all_samples['LFMC value'].mean())
counts = [{}, {}]
data1 = lc1_plot_data[(lc1_plot_data['Land cover'] == 'Forest')]['LFMC value'].reset_index().drop_duplicates().set_index(['ID'])['LFMC value']
data2 = lc1_plot_data[(lc1_plot_data['Land cover'] == 'Forest')][['Estimated LFMC', 'Model']].pivot(columns=['Model'])
data3 = lc2_plot_data[(lc2_plot_data['Land cover'] == 'Forest')]['LFMC value'].reset_index().drop_duplicates().set_index(['ID'])['LFMC value']
data4 = lc2_plot_data[(lc2_plot_data['Land cover'] == 'Forest')][['Estimated LFMC', 'Model']].pivot(columns=['Model'])
threshold = 105
counts[0]['Forest'] = [(data1 <= threshold).sum(), (data1 > threshold).sum(), threshold]
counts[1]['Forest'] = [(data3 <= threshold).sum(), (data3 > threshold).sum(), threshold]
forest = threshold_summary(data1, data2, data3, data4, threshold, ybar)

In [None]:
data1 = lc1_plot_data[(lc1_plot_data['Land cover'] == 'Grassland')]['LFMC value'].reset_index().drop_duplicates().set_index(['ID'])['LFMC value']
data2 = lc1_plot_data[(lc1_plot_data['Land cover'] == 'Grassland')][['Estimated LFMC', 'Model']].pivot(columns=['Model'])
data3 = lc2_plot_data[(lc2_plot_data['Land cover'] == 'Grassland')]['LFMC value'].reset_index().drop_duplicates().set_index(['ID'])['LFMC value']
data4 = lc2_plot_data[(lc2_plot_data['Land cover'] == 'Grassland')][['Estimated LFMC', 'Model']].pivot(columns=['Model'])
threshold = 67
counts[0]['Grassland'] = [(data1 <= threshold).sum(), (data1 > threshold).sum(), threshold]
counts[1]['Grassland'] = [(data3 <= threshold).sum(), (data3 > threshold).sum(), threshold]
grassland = threshold_summary(data1, data2, data3, data4, threshold, ybar)

In [None]:
data1 = lc1_plot_data[(lc1_plot_data['Land cover'] == 'Shrubland')]['LFMC value'].reset_index().drop_duplicates().set_index(['ID'])['LFMC value']
data2 = lc1_plot_data[(lc1_plot_data['Land cover'] == 'Shrubland')][['Estimated LFMC', 'Model']].pivot(columns=['Model'])
data3 = lc2_plot_data[(lc2_plot_data['Land cover'] == 'Shrubland')]['LFMC value'].reset_index().drop_duplicates().set_index(['ID'])['LFMC value']
data4 = lc2_plot_data[(lc2_plot_data['Land cover'] == 'Shrubland')][['Estimated LFMC', 'Model']].pivot(columns=['Model'])
threshold = 121
counts[0]['Shrubland'] = [(data1 <= threshold).sum(), (data1 > threshold).sum(), threshold]
counts[1]['Shrubland'] = [(data3 <= threshold).sum(), (data3 > threshold).sum(), threshold]
shrubland = threshold_summary(data1, data2, data3, data4, threshold, ybar)

In [None]:
def fmt_label(mean_, std_):
    return f'{mean_:.1f}%$\pm${std_:.1f}'

def make_autopct(df):
    def my_autopct(pct):
        std = df[np.round(df['mean'],2) == np.round(pct,2)]['std dev']
        return fmt_label(pct, std[0])
    return my_autopct

In [None]:
bar_order = ['High fire risk', 'Warning failures', 'False alarms', 'Low fire risk']
lc_keys = ['Forest', 'Grassland', 'Shrubland']
temp = {'Forest': forest, 'Grassland': grassland, 'Shrubland': shrubland}
plot_data = {
    DISPLAY_NAMES[0]: {k: [counts[0][k][2], temp[k][0].T, temp[k][2][DISPLAY_NAMES[0]]] for k in lc_keys},
    DISPLAY_NAMES[1]: {k: [counts[1][k][2], temp[k][1].T, temp[k][2][DISPLAY_NAMES[1]]] for k in lc_keys}
}

In [None]:
palette = sns.color_palette('cubehelix', 5)[1:]
colours = [palette[1], palette[2], palette[0], palette[3]]
sns.set_palette(colours)
lc_keys = ['Forest', 'Grassland', 'Shrubland']
pie_order = ['High fire risk', 'Warning failures', 'Low fire risk', 'False alarms']
pie_labels = ['Correct high fire\nrisk estimations', 'Missed high fire\nrisk estimations',
              'Correct low fire\nrisk estimations', 'False high fire\nrisk estimations']
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(
    2, 3, figsize=(5.397, 4.75), sharey=False, sharex=False,  #7.346, 6.5      5.5, 4.5
    constrained_layout=True, dpi=500, linewidth=2, edgecolor="black")

df1 = pd.concat([forest[0].T['mean'], grassland[0].T['mean'], shrubland[0].T['mean']], keys=lc_keys).unstack()
df1a = pd.concat([forest[2]['Within-site Models'], grassland[2]['Within-site Models'], shrubland[2]['Within-site Models']], axis=1, keys=lc_keys)
for idx, ax in enumerate([ax1, ax2, ax3]):
    lc_key = lc_keys[idx]
    sa = 15 if idx == 2 else 0
    df = plot_data[DISPLAY_NAMES[0]][lc_key][1]
    df1[pie_order].loc[lc_key].plot.pie(
        ax=ax, colors=colours, autopct=make_autopct(df),
        labels=['', '', '', ''], startangle=sa, pctdistance=0.5, radius=1)
    if idx == 1:
        ax.set_title(f"{DISPLAY_NAMES[0]}", fontsize=10)
    ax.set_ylabel('')
    ax.set_xlabel(lc_key, fontsize=8)    
    ax.xaxis.set_label_position('top')
    ax.set_xlim((-1.0,1.0))
    ax.set_ylim((-1.1,1.0))
    df_ = df1a[lc_key]
    values_ = [['RMSE', f"{df_['RMSE mean']:.2f}% $\pm${df_['RMSE std']:.2f}"],
               ['Bias', f"{df_['Bias mean']:.2f}% $\pm${df_['Bias std']:.2f}"],
               ['$R^2$', f"{df_['R2 mean']:.2f} $\pm${df_['R2 std']:.2f}"]]
    tab_ = ax.table(cellText=values_, loc='bottom', fontsize=TEXT_SIZE, colWidths=[0.27, 0.65])
    tab_.auto_set_font_size(False)

df2 = pd.concat([forest[1].T['mean'], grassland[1].T['mean'], shrubland[1].T['mean']], keys=['Forest', 'Grassland', 'Shrubland']).unstack()
df2a = pd.concat([forest[2]['Out-of-site Models'], grassland[2]['Out-of-site Models'], shrubland[2]['Out-of-site Models']], axis=1, keys=lc_keys)
for idx, ax in enumerate([ax4, ax5, ax6]):
    lc_key = lc_keys[idx]
    pd_ = plot_data[DISPLAY_NAMES[1]]
    sa = 20 if idx == 2 else 0
    df = plot_data[DISPLAY_NAMES[1]][lc_key][1]
    df2[pie_order].loc[lc_key].plot.pie(
        ax=ax, colors=colours, autopct=make_autopct(df), # autopct='%1.1f%%',
        labels=['', '', '', ''], startangle=sa, pctdistance=0.5, radius=1)
    if idx == 1:
        ax.set_title(f"{DISPLAY_NAMES[1]}", fontsize=10)
        text_str = fmt_label(*list(plot_data['Out-of-site Models'][lc_key][1].loc[pie_order[0]]))
        for child in ax.get_children():
            if isinstance(child, plt.Text) and child.get_text() == text_str:
                x_pos, y_pos = child.get_position()
                y_pos = y_pos + 0.04
                x_pos = x_pos + 0.05
                child.set_y(y_pos)
                child.set_x(x_pos)
                break
    ax.set_ylabel('')
    ax.set_xlabel(lc_key, fontsize=8)    
    ax.xaxis.set_label_position('top')
    ax.set_xlim((-1.0,1.0))
    ax.set_ylim((-1.1,1.0))
    df_ = df2a[lc_key]
    values_ = [['RMSE', f"{df_['RMSE mean']:.2f}% $\pm${df_['RMSE std']:.2f}"],
               ['Bias', f"{df_['Bias mean']:.2f}% $\pm${df_['Bias std']:.2f}"],
               ['$R^2$', f"{df_['R2 mean']:.2f} $\pm${df_['R2 std']:.2f}"]]
    tab_ = ax.table(cellText=values_, loc='bottom', fontsize=TEXT_SIZE, colWidths=[0.27, 0.65])
    tab_.auto_set_font_size(False)

ax1.legend(labels=pie_labels, bbox_to_anchor=(-0.10, 1.0))
ax4.text(-1.3, 1.2, "Fire danger thresholds\n" + "\n".join([f"{lc}: {th[2]:4d}%" for lc, th in counts[0].items()]),
         va='top', ha='right', bbox=dict(boxstyle='round,pad=0.3', facecolor='none', edgecolor='lightgrey'))

fig.savefig(os.path.join(output_dir, 'LFMC thresholds.jpeg'), format="jpeg", bbox_inches='tight', pad_inches=0.1, dpi=500)