We want to understand performance of the neural networks, and how these
vary across weight initialization and for data input. To do this we need
to load in our test results, and calculate some errors, and control to a
baseline.

``` python
import os
import pandas as pd
import seaborn as sns
from sklearn.metrics import root_mean_squared_error
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
```

Set up some lists to keep track of error values.

``` python
results_dir = Path("../../results")
dfs = []
nrmses = []
rmses = []
seeds = []
datas = []
count = []
values = []
mean_perc_errs = []
mean_perc_errs_std = []
```

Now load every file (that is specific network performance) both to merge
them together, and to calculate error.

``` python
for f in os.listdir(results_dir):
    if f.endswith('.csv') is False:
        continue
    parts = f.split('_')
    seed = parts[-2]
    data = '_'.join(parts[:-2])
    df = pd.read_csv(results_dir / f)
    df['seed'] = [seed]*len(df)
    df['data'] = [data]*len(df)
    df['values'] = ["D/K"]*len(df)
    df['parts'] = [len(parts[:-2])]*len(df)
    count.append(len(parts[:-2]))
    dfs.append(df)
    perc_err = np.abs(df['true_labels'] - df['predictions'])/df['true_labels']
    mean_perc_err = np.mean(perc_err)
    mean_perc_err_std = np.std(perc_err)
    mean_perc_errs.append(mean_perc_err)
    mean_perc_errs_std.append(mean_perc_err_std)
    rmse = root_mean_squared_error(df['true_labels'], df['predictions'])
    rmses.append(rmse)
    nrmse = rmse/(np.ptp(df['true_labels']))
    nrmses.append(nrmse)
    seeds.append(seed)
    datas.append(data)
    values.append("D/K")
```

``` python
for f in os.listdir(results_dir / "KD_results"):
    if f.endswith('.csv') is False:
        continue
    parts = f.split('_')
    seed = parts[-2]
    data = '_'.join(parts[:-2])
    df = pd.read_csv(results_dir / "KD_results" / f)
    df['seed'] = [seed]*len(df)
    df['data'] = [data]*len(df)
    df['value'] = ["K/D"]*len(df)
    df['parts'] = [len(parts[:-2])]*len(df)
    count.append(len(parts[:-2]))
    dfs.append(df)
    perc_err = np.abs(df['true_labels'] - df['predictions'])/df['true_labels']
    mean_perc_err = np.mean(perc_err)
    mean_perc_err_std = np.std(perc_err)
    mean_perc_errs.append(mean_perc_err)
    mean_perc_errs_std.append(mean_perc_err_std)
    rmse = root_mean_squared_error(df['true_labels'], df['predictions'])
    rmses.append(rmse)
    nrmse = rmse/(np.ptp(df['true_labels']))
    nrmses.append(nrmse)
    seeds.append(seed)
    datas.append(data)
    values.append("K/D")
```

Now create our dataframes for analysis

``` python
all_df = pd.concat(dfs)
rmse_df = pd.DataFrame({
    'data': datas,
    'seed': seeds,
    'nrmse': nrmses,
    'rmse': rmses,
    'parts': count,
    'mean_perc_err': mean_perc_errs,
    'mean_perc_err_std': mean_perc_errs_std,
    'value': values
})
label_map = {'dem': 'Elevation',
         'slope': 'Slope',
         'curvature': 'Curvature',
         'accumulation': 'Flow Accumulation',
         'logaccumulation': '$\\log_{10}(\\text{Flow Accumulation})$',
         'dem_dem': 'Elevation, Elevation',
         'dem_slope': 'Elevation, Slope',
         'dem_curvature': 'Elevation, Curvature',
         'dem_accumulation': 'Elevation, Flow Accumulation'}
rmse_df['labels'] = rmse_df['data'].map(label_map)
all_df['labels'] = all_df['data'].map(label_map)
dem_1part = rmse_df[(rmse_df['data'] == 'dem') & (rmse_df['parts'] == 1)]['nrmse'].mean()
dem_2part = rmse_df[(rmse_df['data'] == 'dem_dem') & (rmse_df['parts'] == 2)]['nrmse'].mean()
```

Now we use the DEM based network, and the DEMx2 based networks as
baselines.

``` python
# Calculate percentage improvements for all models
dem_baseline = rmse_df[(rmse_df['data'] == 'dem') & (rmse_df['parts'] == 1) & (rmse_df['value']=="D/K")]['nrmse'].mean()
dem_dem_baseline = rmse_df[(rmse_df['data'] == 'dem_dem') & (rmse_df['parts'] == 2) & rmse_df['value']=="D/K"]['nrmse'].mean()
baseline_names = ['dem_baseline', 'dem_dem_baseline']
baselines = [dem_baseline, dem_dem_baseline]
improvements = [[] for _ in baselines]
for index, row in rmse_df.iterrows():
    parts = row['parts']
    nrmse = row['nrmse']
    for i, baseline in enumerate(baselines):
        improvements[i].append((baseline-nrmse)/baseline*100)

for i, name in enumerate(baseline_names):
    rmse_df[name] = improvements[i]
```

Now we save the dataframes for plotting later.

``` python
from pathlib import Path
analysis_dir = Path("../../analysis")
analysis_dir.mkdir(exist_ok=True)
rmse_path = analysis_dir / "overall_performance.csv"
rmse_df.to_csv(rmse_path)
test_path = analysis_dir / "all_test_performance.csv"
all_df.to_csv(test_path)
```

error<sub>fig</sub>, ax = plt.subplots(figsize=(8, 6))
ax.bar(rmse<sub>df</sub>\['data'\],
rmse<sub>df</sub>\['mean<sub>percerr</sub>'\],
yerr=rmse<sub>df</sub>\['mean<sub>percerrstd</sub>'\], capsize=5)
ax.set<sub>ylabel</sub>('Mean Percentage Error (%)')
ax.set<sub>title</sub>('Mean Percentage Error by Model Type')
error<sub>fig</sub>.tight<sub>layout</sub>() error<sub>fig</sub>.show()
error<sub>fig</sub>.savefig("../../results/figures/mean<sub>percentageerror</sub>.png",
dpi=300)

improvement<sub>fig</sub>, axs = plt.subplots(1, 2, figsize=(12, 6))
ax1, ax2 = axs p1<sub>sortedidx</sub> =
np.argsort(p1<sub>improvements</sub>) p1<sub>improvements</sub> =
\[p1<sub>improvements</sub>\[i\] for i in p1<sub>sortedidx</sub>\]
p1<sub>lowererrors</sub> = \[p1<sub>lowererrors</sub>\[i\] for i in
p1<sub>sortedidx</sub>\] p1<sub>uppererrors</sub> =
\[p1<sub>uppererrors</sub>\[i\] for i in p1<sub>sortedidx</sub>\]
p1<sub>labels</sub> = \[p1<sub>labels</sub>\[i\] for i in
p1<sub>sortedidx</sub>\]

ax1.barh(p1<sub>labels</sub>, p1<sub>improvements</sub>,
xerr=\[p1<sub>lowererrors</sub>, p1<sub>uppererrors</sub>\], capsize=5)
ax1.axvline(0, color='black', linestyle='–', linewidth=1)
ax1.set<sub>xlabel</sub>('% Improvement over Baseline')
ax1.set<sub>title</sub>('Performance Improvement vs Baseline DEM Model')
ax1.grid(axis='x', alpha=0.3)

p2<sub>sortedidx</sub> = np.argsort(p2<sub>improvements</sub>)
p2<sub>improvements</sub> = \[p2<sub>improvements</sub>\[i\] for i in
p2<sub>sortedidx</sub>\] p2<sub>lowererrors</sub> =
\[p2<sub>lowererrors</sub>\[i\] for i in p2<sub>sortedidx</sub>\]
p2<sub>uppererrors</sub> = \[p2<sub>uppererrors</sub>\[i\] for i in
p2<sub>sortedidx</sub>\] p2<sub>labels</sub> =
\[p2<sub>labels</sub>\[i\] for i in p2<sub>sortedidx</sub>\]

ax2.barh(p2<sub>labels</sub>, p2<sub>improvements</sub>,
xerr=\[p2<sub>lowererrors</sub>, p2<sub>uppererrors</sub>\], capsize=5)
ax2.axvline(0, color='black', linestyle='–', linewidth=1)
ax2.set<sub>xlabel</sub>('% Improvement over Baseline')
ax2.set<sub>title</sub>('Performance Improvement vs Baseline DEM Model')
ax2.grid(axis='x', alpha=0.3)
improvement<sub>fig</sub>.tight<sub>layout</sub>()
improvement<sub>fig</sub>.show()

raw<sub>performancefig</sub>, axs = plt.subplots(1, 2, figsize=(14, 6))

part1<sub>data</sub> = all<sub>df</sub>\[all<sub>df</sub>\['parts'\] ==
1\] sns.scatterplot(data=part1<sub>data</sub>,
x='true<sub>labels</sub>', y='predictions', hue='data', style='seed',
alpha=0.5, s=20, ax=axs\[0\])

axs\[0\].set<sub>xscale</sub>('log')
axs\[0\].set<sub>yscale</sub>('log') axs\[0\].set<sub>xlabel</sub>('True
D/K (log scale)') axs\[0\].set<sub>ylabel</sub>('Predicted D/K (log
scale)') axs\[0\].set<sub>title</sub>('1-Part Model Predictions')
axs\[0\].legend(bbox<sub>toanchor</sub>=(1.05, 1), loc='upper left')
axs\[0\].grid(alpha=0.3)

min<sub>val</sub> =
min(part1<sub>data</sub>\['true<sub>labels</sub>'\].min(),
part1<sub>data</sub>\['predictions'\].min()) max<sub>val</sub> =
max(part1<sub>data</sub>\['true<sub>labels</sub>'\].max(),
part1<sub>data</sub>\['predictions'\].max())
axs\[0\].plot(\[min<sub>val</sub>, max<sub>val</sub>\],
\[min<sub>val</sub>, max<sub>val</sub>\], 'k–', linewidth=2, label='1:1
line')#, zorder=0)

ax02 = axs\[0\].twinx()
ax02.hist(part1<sub>data</sub>\['true<sub>labels</sub>'\], bins=50,
color='gray', alpha=0.3)

part2<sub>data</sub> = all<sub>df</sub>\[all<sub>df</sub>\['parts'\] ==
2\] sns.scatterplot(data=part2<sub>data</sub>,
x='true<sub>labels</sub>', y='predictions', hue='data', style='seed',
alpha=0.5, s=20, ax=axs\[1\])

axs\[1\].set<sub>xscale</sub>('log')
axs\[1\].set<sub>yscale</sub>('log') axs\[1\].set<sub>xlabel</sub>('True
D/K (log scale)') axs\[1\].set<sub>ylabel</sub>('Predicted D/K (log
scale)') axs\[1\].set<sub>title</sub>('2-Part Model Predictions')
axs\[1\].legend(bbox<sub>toanchor</sub>=(1.05, 1), loc='upper left')
axs\[1\].grid(alpha=0.3)

min<sub>val</sub> =
min(part2<sub>data</sub>\['true<sub>labels</sub>'\].min(),
part2<sub>data</sub>\['predictions'\].min()) max<sub>val</sub> =
max(part2<sub>data</sub>\['true<sub>labels</sub>'\].max(),
part2<sub>data</sub>\['predictions'\].max())
axs\[1\].plot(\[min<sub>val</sub>, max<sub>val</sub>\],
\[min<sub>val</sub>, max<sub>val</sub>\], 'k–', linewidth=2, label='1:1
line')#, zorder=0) ax12 = axs\[1\].twinx()
ax12.hist(part2<sub>data</sub>\['true<sub>labels</sub>'\], bins=50,
color='gray', alpha=0.3)

raw<sub>performancefig</sub>.tight<sub>layout</sub>()
raw<sub>performancefig</sub>.show()

def moving<sub>windownrmse</sub>(y<sub>true</sub>, y<sub>pred</sub>,
window<sub>frac</sub>=0.2): """Calculate NRMSE in a moving window along
sorted true values""" sorted<sub>idx</sub> =
np.argsort(y<sub>true</sub>) y<sub>truesorted</sub> =
y<sub>true</sub>\[sorted<sub>idx</sub>\] y<sub>predsorted</sub> =
y<sub>pred</sub>\[sorted<sub>idx</sub>\]

window<sub>size</sub> = int(len(y<sub>true</sub>) \*
window<sub>frac</sub>) nrmses = \[\] centers = \[\]

for i in range(len(y<sub>true</sub>) - window<sub>size</sub>):
window<sub>true</sub> =
y<sub>truesorted</sub>\[i:i+window<sub>size</sub>\]
window<sub>pred</sub> =
y<sub>predsorted</sub>\[i:i+window<sub>size</sub>\]

rmse = root<sub>meansquarederror</sub>(window<sub>true</sub>,
window<sub>pred</sub>)#np.sqrt(np.mean((window<sub>true</sub> -
window<sub>pred</sub>)\*\*2)) nrmse = rmse /
np.ptp(window<sub>true</sub>)

nrmses.append(nrmse) centers.append(np.mean(window<sub>true</sub>))

return np.array(centers), np.array(nrmses)

window<sub>nrmsefig</sub>, (ax5, ax6) = plt.subplots(1, 2, figsize=(14,
6)) one<sub>partdata</sub> =
all<sub>df</sub>\[all<sub>df</sub>\['parts'\] == 1\] for
data<sub>type</sub> in
sorted(one<sub>partdata</sub>\['data'\].unique()): data<sub>subset</sub>
= all<sub>df</sub>\[all<sub>df</sub>\['data'\] == data<sub>type</sub>\]

all<sub>centers</sub> = \[\] all<sub>nrmses</sub> = \[\]

for seed in data<sub>subset</sub>\['seed'\].unique():
seed<sub>data</sub> =
data<sub>subset</sub>\[data<sub>subset</sub>\['seed'\] == seed\]
centers, nrmses = moving<sub>windownrmse</sub>(
seed<sub>data</sub>\['true<sub>labels</sub>'\].values,
seed<sub>data</sub>\['predictions'\].values, window<sub>frac</sub>=0.1 )
all<sub>centers</sub>.append(centers)
all<sub>nrmses</sub>.append(nrmses)

if len(all<sub>nrmses</sub>) \> 0: mean<sub>nrmse</sub> =
np.mean(all<sub>nrmses</sub>, axis=0) mean<sub>centers</sub> =
np.mean(all<sub>centers</sub>, axis=0)

ax5.plot(mean<sub>centers</sub>, mean<sub>nrmse</sub>, linewidth=2,
label=data<sub>type</sub>)

ax5.set<sub>xlabel</sub>('True D/K Value')
ax5.set<sub>ylabel</sub>('Local NRMSE') ax5.set<sub>title</sub>('Rolling
NRMSE: 1-Part Models') ax5.legend() ax5.grid(alpha=0.3)

two<sub>partdata</sub> = all<sub>df</sub>\[all<sub>df</sub>\['parts'\]
== 2\] for data<sub>type</sub> in
sorted(two<sub>partdata</sub>\['data'\].unique()): data<sub>subset</sub>
= all<sub>df</sub>\[all<sub>df</sub>\['data'\] == data<sub>type</sub>\]

all<sub>centers</sub> = \[\] all<sub>nrmses</sub> = \[\]

for seed in data<sub>subset</sub>\['seed'\].unique():
seed<sub>data</sub> =
data<sub>subset</sub>\[data<sub>subset</sub>\['seed'\] == seed\]
centers, nrmses = moving<sub>windownrmse</sub>(
seed<sub>data</sub>\['true<sub>labels</sub>'\].values,
seed<sub>data</sub>\['predictions'\].values, window<sub>frac</sub>=0.1 )
all<sub>centers</sub>.append(centers)
all<sub>nrmses</sub>.append(nrmses)

if len(all<sub>nrmses</sub>) \> 0: mean<sub>nrmse</sub> =
np.mean(all<sub>nrmses</sub>, axis=0) mean<sub>centers</sub> =
np.mean(all<sub>centers</sub>, axis=0)

ax6.plot(mean<sub>centers</sub>, mean<sub>nrmse</sub>, linewidth=2,
label=data<sub>type</sub>)

ax6.set<sub>xlabel</sub>('True D/K Value')
ax6.set<sub>ylabel</sub>('Local NRMSE') ax6.set<sub>title</sub>('Rolling
NRMSE: 2-Part Models') ax6.legend() ax6.grid(alpha=0.3)
ax5.set<sub>xscale</sub>('log') ax6.set<sub>xscale</sub>('log')

window<sub>nrmsefig</sub>.tight<sub>layout</sub>()
window<sub>nrmsefig</sub>.show()

improvement<sub>fig</sub>.savefig("../../results/figures/improvement<sub>overbaseline</sub>.png",
dpi=300)
raw<sub>performancefig</sub>.savefig("../../results/figures/raw<sub>performancescatter</sub>.png",
dpi=300)
window<sub>nrmsefig</sub>.savefig("../../results/figures/rolling<sub>nrmse</sub>.png",
dpi=300)