# Analyzing Benchmark Results for Validation

In [1]:
import sys
# This allows us to import the nucml utilities
sys.path.append("..")

In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 50)
pd.options.mode.chained_assignment = None  # default='warn'
sns.set_style("white")

import nucml.exfor.data_utilities as exfor_utils
import nucml.evaluation.data_utilities as endf_utils
import nucml.plot.utilities as plot_utils
import nucml.datasets as nuc_data
import nucml.ace.data_utilities as ace_utils
import nucml.model.building_utils as model_building
import nucml.model.utilities as model_utils
import nucml.general_utilities as gen_utils
import nucml.ace.plot as ace_plots

In [3]:
import importlib
importlib.reload(exfor_utils)
importlib.reload(gen_utils)
importlib.reload(endf_utils)
importlib.reload(plot_utils)
importlib.reload(nuc_data)
importlib.reload(ace_utils)
importlib.reload(model_building)
importlib.reload(model_utils)
importlib.reload(ace_plots)
print("Finish re-loading scripts.")

Finish re-loading scripts.


In [4]:
figure_dir = "figures/B0/"

In [5]:
sns.set(font_scale=2.5)
sns.set_style('white')

## Gathering Results from Benchmark Calculations

In [6]:
# model_results_b0 = ace_utils.gather_benchmark_results("ml/XGB_B0/")
model_results_b1 = ace_utils.gather_benchmark_results("ml/XGB_B1/")
# model_results_b2 = ace_utils.gather_benchmark_results("ml/XGB_B2/")
# model_results_b3 = ace_utils.gather_benchmark_results("ml/XGB_B3/")
# model_results_b4 = ace_utils.gather_benchmark_results("ml/XGB_B4/")

## Analyzing Decision Tree Results

In [7]:
# results_b0 = pd.read_csv("../ML_EXFOR_neutrons/3_XGB/xgb_resultsB0.csv")
results_b1 = pd.read_csv("../ML_EXFOR_neutrons/3_XGB/xgb_resultsB1.csv")
# results_b2 = pd.read_csv("../ML_EXFOR_neutrons/3_XGB/xgb_resultsB2.csv")
# results_b3 = pd.read_csv("../ML_EXFOR_neutrons/3_XGB/xgb_resultsB3.csv")
# results_b4 = pd.read_csv("../ML_EXFOR_neutrons/3_XGB/xgb_resultsB4.csv")

In [9]:
results_b1['Model'] = results_b1.model_path.apply(lambda x: os.path.basename(os.path.dirname(x)))
results_b1['dataset'] = 'b0'

In [11]:
# for df, dataset_tag in zip([results_b0, results_b1, results_b2, results_b3, results_b4], ["b0", "b1", "b2", "b3", "b4"]):
#     df['Model'] = df.model_path.apply(lambda x: os.path.basename(os.path.dirname(x)))
#     df['dataset'] = dataset_tag

In [11]:
# results_b0 = results_b0[["Model", "train_mae", "val_mae", "test_mae", "max_bin", "max_depth", "l2", "dataset"]]
results_b1 = results_b1[["Model", "train_mae", "val_mae", "test_mae", "max_bin", "max_depth", "l2", "dataset"]]
# results_b2 = results_b2[["Model", "train_mae", "val_mae", "test_mae", "max_bin", "max_depth", "l2", "dataset"]]
# results_b3 = results_b3[["Model", "train_mae", "val_mae", "test_mae", "max_bin", "max_depth", "l2", "dataset"]]
# results_b4 = results_b4[["Model", "train_mae", "val_mae", "test_mae", "max_bin", "max_depth", "l2", "dataset"]]

In [12]:
# final_b0 = model_results_b0.merge(results_b0, on="Model")
final_b1 = model_results_b1.merge(results_b1, on="Model")
# final_b2 = model_results_b2.merge(results_b2, on="Model")
# final_b3 = model_results_b3.merge(results_b3, on="Model")
# final_b4 = model_results_b4.merge(results_b4, on="Model")

In [14]:
# final_set = final_b0.append(final_b1).append(final_b2).append(final_b3).append(final_b4)

# u233_002_001 = final_set[final_set.Benchmark == "U233_MET_FAST_002_001"].sort_values(by="Deviation_Ana")
# u233_002_002 = final_set[final_set.Benchmark == "U233_MET_FAST_002_002"].sort_values(by="Deviation_Ana")
# u233_001 = final_set[final_set.Benchmark == "U233_MET_FAST_001"].sort_values(by="Deviation_Ana")

In [14]:
u233_002_001 = final_b1[final_b1.Benchmark == "U233_MET_FAST_002_001"].sort_values(by="Deviation_Ana")
u233_002_002 = final_b1[final_b1.Benchmark == "U233_MET_FAST_002_002"].sort_values(by="Deviation_Ana")
u233_001 = final_b1[final_b1.Benchmark == "U233_MET_FAST_001"].sort_values(by="Deviation_Ana")

In [31]:
u233_001.Deviation_Ana = u233_001.Deviation_Ana * 100
u233_002_001.Deviation_Ana = u233_002_001.Deviation_Ana * 100
u233_002_002.Deviation_Ana = u233_002_002.Deviation_Ana * 100

# U233_001

In [32]:
print(model_utils.get_best_models_df(u233_001[["Model", 'K_eff_ana', 'Unc_ana', 'Deviation_Ana', 'train_mae', 'val_mae', 'test_mae']]).to_latex(index=False))

\begin{tabular}{lrrrrrrl}
\toprule
                           Model &  K\_eff\_ana &  Unc\_ana &  Deviation\_Ana &  train\_mae &  val\_mae &  test\_mae &   tag \\
\midrule
XGB30\_L0\_MB30000\_none\_rmse\_B1\_v1 &   1.004940 &  0.00044 &         0.4940 &   0.065501 & 0.121320 &  0.122416 & Train \\
XGB30\_L1\_MB20000\_none\_rmse\_B1\_v1 &   0.977935 &  0.00044 &         2.2065 &   0.079715 & 0.114399 &  0.115458 &   Val \\
XGB30\_L3\_MB10000\_none\_rmse\_B1\_v1 &   0.973408 &  0.00043 &         2.6592 &   0.093639 & 0.115003 &  0.114198 &  Test \\
\bottomrule
\end{tabular}



In [34]:
print(u233_001[["Model", 'K_eff_ana', 'Unc_ana', 'Deviation_Ana', 'train_mae', 'val_mae', 'test_mae']].head(1).to_latex(index=False))

\begin{tabular}{lrrrrrr}
\toprule
                           Model &  K\_eff\_ana &  Unc\_ana &  Deviation\_Ana &  train\_mae &  val\_mae &  test\_mae \\
\midrule
XGB10\_L3\_MB20000\_none\_rmse\_B1\_v1 &   0.997123 &  0.00043 &         0.2877 &   0.196566 & 0.197323 &   0.19741 \\
\bottomrule
\end{tabular}



# U233_002_001

In [45]:
print(model_utils.get_best_models_df(u233_002_001[["Model", 'K_eff_ana', 'Unc_ana', 'Deviation_Ana', 'train_mae', 'val_mae', 'test_mae']]).to_latex(index=False))

\begin{tabular}{lrrrrrrl}
\toprule
                           Model &  K\_eff\_ana &  Unc\_ana &  Deviation\_Ana &  train\_mae &  val\_mae &  test\_mae &   tag \\
\midrule
XGB30\_L0\_MB30000\_none\_rmse\_B1\_v1 &   1.006340 &  0.00044 &         0.6340 &   0.065501 & 0.121320 &  0.122416 & Train \\
XGB30\_L1\_MB20000\_none\_rmse\_B1\_v1 &   0.980368 &  0.00046 &         1.9632 &   0.079715 & 0.114399 &  0.115458 &   Val \\
XGB30\_L3\_MB10000\_none\_rmse\_B1\_v1 &   0.975967 &  0.00044 &         2.4033 &   0.093639 & 0.115003 &  0.114198 &  Test \\
\bottomrule
\end{tabular}



In [46]:
print(u233_002_001[["Model", 'K_eff_ana', 'Unc_ana', 'Deviation_Ana', 'train_mae', 'val_mae', 'test_mae']].head(1).to_latex(index=False))

\begin{tabular}{lrrrrrr}
\toprule
                           Model &  K\_eff\_ana &  Unc\_ana &  Deviation\_Ana &  train\_mae &  val\_mae &  test\_mae \\
\midrule
XGB10\_L3\_MB20000\_none\_rmse\_B1\_v1 &     1.0011 &  0.00044 &           0.11 &   0.196566 & 0.197323 &   0.19741 \\
\bottomrule
\end{tabular}



# U233_002_002

In [47]:
print(model_utils.get_best_models_df(u233_002_002[["Model", 'K_eff_ana', 'Unc_ana', 'Deviation_Ana', 'train_mae', 'val_mae', 'test_mae']]).to_latex(index=False))

\begin{tabular}{lrrrrrrl}
\toprule
                           Model &  K\_eff\_ana &  Unc\_ana &  Deviation\_Ana &  train\_mae &  val\_mae &  test\_mae &   tag \\
\midrule
XGB30\_L0\_MB30000\_none\_rmse\_B1\_v1 &   1.008750 &  0.00039 &         0.8750 &   0.065501 & 0.121320 &  0.122416 & Train \\
XGB30\_L1\_MB20000\_none\_rmse\_B1\_v1 &   0.985785 &  0.00044 &         1.4215 &   0.079715 & 0.114399 &  0.115458 &   Val \\
XGB30\_L3\_MB10000\_none\_rmse\_B1\_v1 &   0.981589 &  0.00046 &         1.8411 &   0.093639 & 0.115003 &  0.114198 &  Test \\
\bottomrule
\end{tabular}



In [48]:
print(u233_002_002[["Model", 'K_eff_ana', 'Unc_ana', 'Deviation_Ana', 'train_mae', 'val_mae', 'test_mae']].head(1).to_latex(index=False))

\begin{tabular}{lrrrrrr}
\toprule
                           Model &  K\_eff\_ana &  Unc\_ana &  Deviation\_Ana &  train\_mae &  val\_mae &  test\_mae \\
\midrule
XGB10\_L2\_MB40000\_none\_rmse\_B1\_v1 &   0.998213 &  0.00042 &         0.1787 &   0.196634 & 0.197023 &  0.197088 \\
\bottomrule
\end{tabular}



# Best Models Overall

In [49]:
import numpy as np

In [50]:
np.array([0.998075, 1.00086, 1.000200]).mean()

0.9997116666666667

In [51]:
ERROR = 0.0288333

In [52]:
model_mean = final_b1.groupby("Model").mean()

In [53]:
model_mean = model_mean[['K_eff_ana']]

In [54]:
model_mean["Error"] = (abs(model_mean.K_eff_ana - 1) /1) * 100

In [55]:
model_mean = model_mean.reset_index()

In [56]:
model_mean.sort_values("Error").head()

Unnamed: 0,Model,K_eff_ana,Error
15,XGB10_L3_MB20000_none_rmse_B1_v1,1.001158,0.115767
0,XGB10_L0_MB10000_none_rmse_B1_v1,0.998326,0.167433
19,XGB10_L3_MB50000_none_rmse_B1_v1,0.998114,0.188633
12,XGB10_L2_MB40000_none_rmse_B1_v1,0.995217,0.478333
30,XGB20_L2_MB30000_none_rmse_B1_v1,0.994857,0.514267


In [57]:
model_mean = model_mean[model_mean.Error < ERROR]

In [58]:
print(model_mean.sort_values("Error").to_latex(index=False))

\begin{tabular}{lrr}
\toprule
Empty DataFrame
Columns: Index(['Model', 'K\_eff\_ana', 'Error'], dtype='object')
Index: Int64Index([], dtype='int64') \\
\bottomrule
\end{tabular}

