In [None]:
import sys
sys.path.append('..')
from config_figures import *

In [None]:
df = pd.read_csv(validation_stats_file).drop(['.geo'], axis=1).rename(columns = {'system:index': 'species'}).sort_values('nobs', ascending=False)
df['nobs_cat'] = ['at least 90' if x >= 90 else 'less than 90' for x in df['nobs']] 
df

In [None]:
df_melt = pd.melt(df, id_vars=['species', 'nobs', 'nobs_cat'] , value_vars=['tss','precision','recall'], var_name='metric')
df_melt

In [None]:
g = sns.relplot(df_melt, x='nobs', y='value', hue='nobs_cat', col='metric', s=3, alpha=0.5, edgecolor="none", height=4, legend=False)
g.set_titles(col_template="{col_name}")
g.set(xlabel='Number of occurrences')
g.axes[0,0].set_title('True skill statistic (TSS)')
g.axes[0,1].set_title('Precision')
g.axes[0,2].set_title('Recall')
plt.savefig(figuredir + 'nobs_tss_precision_recall.png') 

In [None]:
g = sns.FacetGrid(df_melt[df_melt['nobs'] >= 90], col="metric", sharex=False)
g.map(sns.histplot, 'value', bins = 25, edgecolor=None)
g.set_titles(col_template="")
g.axes[0,0].set_xlabel('True skill statistic')
g.axes[0,1].set_xlabel('Precision')
g.axes[0,2].set_xlabel('Recall')
plt.savefig(figuredir + 'tss_precision_recall.png')

In [None]:
df_splot = pd.read_csv(sdm_splot_file).drop(['system:index', '.geo'], axis=1)
df_splot = df_splot.merge(df[['species','nobs']], on='species')
df_splot['tss'] = (df_splot['tp'] / (df_splot['tp'] + df_splot['fn'])) + (df_splot['tn'] / (df_splot['tn'] + df_splot['fp'])) - 1
df_splot['precision'] = df_splot['tp'] / (df_splot['tp'] + df_splot['fp'])
df_splot['recall'] = df_splot['tp'] / (df_splot['tp'] + df_splot['fn'])
df_splot['f1'] = (2 * df_splot['precision'] * df_splot['recall']) / (df_splot['precision'] + df_splot['recall'])
df_splot

In [None]:
df_splot_5pos = df_splot[df_splot[['fn','tp']].sum(axis=1) > 5]
df_splot_5pos

In [None]:
print(df_splot_5pos['tss'].mean(), df_splot_5pos['tss'].std())

In [None]:
# average false positive rate
(df_splot_5pos['fp'] / (df_splot_5pos['fp'] + df_splot_5pos['tn'])).mean() 

In [None]:
# average false negative rate
(df_splot_5pos['fn'] / (df_splot_5pos['fn'] + df_splot_5pos['tp'])).mean() 

In [None]:
df_splot_melt = pd.melt(df_splot_5pos, id_vars=['species','nobs'], value_vars=['tss','precision','recall'], var_name='metric')
df_splot_melt

In [None]:
g = sns.FacetGrid(df_splot_melt, col="metric", sharex=False, sharey=False)
g.map(sns.histplot, 'value', bins = 25, edgecolor=None)
g.set_titles(col_template="")
g.axes[0,0].set_xlabel('True skill statistic')
g.axes[0,1].set_xlabel('Precision')
g.axes[0,2].set_xlabel('Recall')
plt.savefig(figuredir + 'splot_tss_precision_recall.png')

In [None]:
sdm_mhs_df = pd.read_csv(sdm_mhs_iou_file).drop(columns=['.geo','system:index'])
sdm_mhs_df = sdm_mhs_df[sdm_mhs_df['IoU'] != 0].sort_values('IoU')
sdm_mhs_df['species'] = [x.replace('_',' ') for x in sdm_mhs_df['species']]
sdm_mhs_df['rel_sdm_area'] = sdm_mhs_df['sdm_area'] / sdm_mhs_df['mhs_area'] * 100
sdm_mhs_df['rel_diff'] = (sdm_mhs_df['mhs_area'] - sdm_mhs_df['sdm_area']) / sdm_mhs_df['mhs_area'] * 100
sdm_mhs_df

In [None]:
np.mean(sdm_mhs_df['IoU'])

In [None]:
np.mean(sdm_mhs_df['rel_sdm_area'])

In [None]:
np.mean(sdm_mhs_df['rel_diff'])

In [None]:
df_melt = pd.melt(sdm_mhs_df, id_vars='species', value_vars=['sdm_area','mhs_area'])
d = {'sdm_area':'SDM','mhs_area':'MHS'}
df_melt['variable'] = df_melt['variable'].apply(lambda var: d[var])
df_melt['value'] = df_melt['value'] / 1e12
df_melt

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, sharey=True, figsize=(12, 4))

sns.barplot(df_melt, x='value', y='species', hue='variable', ax=ax1)
ax1.set(ylabel=None, xlabel='Range size (million km2)')
ax1.legend(title=None)

sns.barplot(sdm_mhs_df, x='IoU', y='species', ax=ax2, color='grey')
ax2.set(ylabel=None)

sns.barplot(sdm_mhs_df, x='rel_sdm_area', y='species', ax=ax3, color='grey')
ax3.set(ylabel=None, xlabel= 'Relative SDM range size (%)')
ax3.axvline(100, color='black')

plt.tight_layout()
plt.savefig(figuredir + 'sdm_mhs_iou.png', dpi=300)