This notebooks generates tables with model performance metrics for different subsystems in different tests, and the average ones. In order to generate the table first the data needs to be generated. To do so run the subsystem cell in the benchmark notebook.

In [18]:
from pathlib import Path
import datetime
from loguru import logger
import numpy as np

from phd_visualizations.textual import generate_latex_table

from solarmed_modeling.benchmark.utils import import_benchmark_results

%load_ext autoreload
%autoreload 2

data_path: Path = Path("../data")
results_path: Path = Path("../results/benchmark_results.json")
eval_date_str = '20250910' # '20250829' # '20250720'

# Collect all metrics of interest
metrics_of_interest = ['r2', 'mae', 'mape']

# Latex table parameters
regular_col_ids = [
    "variable",
    # "modelling_alternative",
    "sample_time",
    "test_id",
]

regular_col_labels = [
    r"Predicted\\ variable",
    # r"Modelling\\ principle",
    r"Sample\\ time\\ (s)",
    r"Test\\ date",
]

group_row_ids = [
    "variable",
    "modelling_alternative",
    "test_id",
    "sample_time"
]

metric_info = {
    "r2": r"R$^2$\\ (-)",
    "mae": r"MAE\\ (s.u.)",
    "mape": r"MAPE\\ (\%)",
    "time": r"Time\\ (s)",
}

submetric_ids = ["val", "avg_val"]
submetric_labels = ["Test", "Avg."]
group_submetric_ids = ["avg_val"]

"""

Latex table generation expected input format:
    
    data = [
        {
            "variable": "T$_{dc,out}$ ($^\\circ$C)",
            "alternative": "Physical model",
            "time": "0.035",
            "test_id": "YYYYMMDD",
            "metrics": {
                "r2": {"T": "0.98", "V": "0.97"},
                "rmse": {"T": "0.50", "V": "0.52"},
                "mae": {"T": "0.45", "V": "0.48"},
            }
        },
        ...,
        {...}
    ]

    benchmark_output = [
        eval_date_str : {
            "system": [
                "test_id": "YYYYMMDD",
                "alternative": "",
                "metrics": {},
                "metrics_per_variable": {
                    "var": {}
                },
                "elapsed_time": ,
            ] 
        }
    ]

"""

def parse_benchmark_results_to_latex_data(
    selected_alternatives: list[str],
    selected_sample_times: list[int],
    out_var_ids: list[str],
    stats: list[dict],
    out_vars_dict: dict,
    filter_out_tests: list[str] = []
) -> list[dict]:
    table_data_total = []

    # Initialize dictionary to store lists of each metric
    for selected_alternative in selected_alternatives:
        for selected_sample_time in selected_sample_times:
            logger.info(f"Processing alternative: {selected_alternative}, sample time: {selected_sample_time} s")
            for var_id in out_var_ids:
                table_data = []
                metric_values = {metric: [] for metric in metrics_of_interest + ["time"]}
                for item in stats:
                    if item["alternative"] != selected_alternative or item["sample_rate"] != selected_sample_time:
                        continue
                    if item["test_id"] in filter_out_tests:
                        logger.warning(f"Skipping test id: {item['test_id']} as it is in the filter_out_tests list")
                        continue
                    
                    metrics_dict = {}
                    for metric_id in metrics_of_interest:
                        value = item["metrics_per_variable"][var_id].get(metric_id.upper())
                        if value is not None and np.isfinite(value):
                            metric_values[metric_id].append(value)
                            metrics_dict[metric_id] = {"val": value, "avg_val": ""}
                    # Add elapsed time to metrics_dict
                    metric_values["time"].append(item["elapsed_time"])
                    metrics_dict["time"] = {"val": item["elapsed_time"], "avg_val": ""}    
                    
                    table_data.append(
                        {
                            "variable": out_vars_dict[var_id],
                            "modelling_alternative": "First Principles", 
                            "test_id": item["test_id"],
                            "metrics": metrics_dict,
                            "sample_time": selected_sample_time,
                            "alternative": selected_alternative
                        }
                    )
                    logger.info(f"Added data for variable: {var_id}, test id: {item['test_id']}, alternative: {selected_alternative}, sample time: {selected_sample_time} s")
                    
                # Compute average for each metric
                average_metrics = {metric: np.mean(values) for metric, values in metric_values.items()}
                # (Optional) Also compute standard deviation or median
                std_metrics = {metric: np.std(values) for metric, values in metric_values.items()}
                
                [
                    metric_vals.update({"avg_val": average_metrics[metric_id]}) 
                    for data in table_data for metric_id, metric_vals in data["metrics"].items()
                ]

                table_data_total.extend(table_data)
    
    return table_data_total


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Solar field

In [44]:
# Load result
from solarmed_modeling.solar_field.benchmark import model_id

out_vars_dict = {
    "Tsf_out": "T$_{sf,out}$ ($^\\circ$C)",
}
out_var_ids = list(out_vars_dict.keys())

# eval_date_str = "20250720"
selected_alternatives: list[str] = ["constant-water-props"]
selected_sample_times: list[int] = [5, 400]

stats = import_benchmark_results(results_path=results_path, model_id=model_id, eval_date_str=eval_date_str)
# stats


In [45]:
# Parse results dict into compatible format for latex table

table_data = parse_benchmark_results_to_latex_data(
    selected_alternatives=selected_alternatives,
    selected_sample_times=selected_sample_times,
    out_var_ids=out_var_ids,
    stats=stats,
    out_vars_dict=out_vars_dict,
)


[32m2025-07-20 17:04:12.034[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m101[0m - [1mProcessing alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:12.036[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m129[0m - [1mAdded data for variable: Tsf_out, test id: 20231030, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:12.037[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m129[0m - [1mAdded data for variable: Tsf_out, test id: 20231106, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:12.037[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m129[0m - [1mAdded data for variable: Tsf_out, test id: 20230630, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:12.038[0m | [1mINFO    [0m | 

In [46]:
table_str = generate_latex_table(
    regular_col_ids,
    regular_col_labels,
    metric_info,
    table_data,
    submetric_ids,
    group_row_ids,
    submetric_labels,
    group_submetric_ids
)

print(table_str)


\begin{tabular}{ccccccccccccccccccccc}
\hline
\multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Predicted\\ variable\end{tabular}}} &  & \multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Sample\\ time\\ (s)\end{tabular}}} &  & \multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Test\\ date\end{tabular}}} &  & \multicolumn{15}{c}{\textbf{Performance metric}}
\\\cline{7-21}
 &  &  &  &  &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}R$^2$\\ (-)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAE\\ (s.u.)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAPE\\ (\%)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}Time\\ (s)\end{tabular}}}
 \\\cline{7-9}\cline{11-13}\cline{15-17}\cline{19-21}
 &  &  &  &  &  & Test &  & Avg. &  & Test &  & Avg. &  & Test &  & Avg. &  & Test &  & Avg.
 \\\cline{1-1}\cline{3-3}\cline{5-5}\cline{7-7}\cline{9-9}\cline{11-11}\cline{13-13}\cline{15-15}\cline{17-17}\cline{19-

### Thermal storage

In [10]:
# Load result
from solarmed_modeling.thermal_storage.benchmark import model_id

out_vars_dict = {
    "Tts_h_t": "T$_{ts,h}$ ($^\\circ$C)",
    "Tts_c_b": "T$_{ts,c}$ ($^\\circ$C)",
}
out_var_ids = list(out_vars_dict.keys())

# eval_date_str = "20250720"
selected_alternatives: list[str] = ["constant-water-props"]
selected_sample_times: list[int] = [5, 400]

# (Some tests just don't make sense, the storage is being discharge and it results in increases in temperature??)
filter_out_tests = [
    "20231106",
    "20230703",
    "20231030",
    "20231031"
]

stats = import_benchmark_results(results_path=results_path, model_id=model_id, eval_date_str=eval_date_str)
stats


[{'test_id': '20231030',
  'alternative': 'standard',
  'metrics': {'ITAE': 545494119.0963014,
   'ISE': 635732.1357225121,
   'IAE': 124111.9465098776,
   'RMSE': 3.8358803490438196,
   'MAE': 2.8725627577160022,
   'MSE': 14.713978052180535,
   'R2': 0.6330624236677767,
   'NRMSE': 0.6057537258096093,
   'MAPE': 3.3938568239624196},
  'metrics_per_variable': {'Tts_h_t': {'RMSE': 2.884866627730137,
    'MAE': 2.3534974221436697,
    'MSE': 8.322455459791053,
    'R2': -0.5325974728270904,
    'NRMSE': 1.2379812085920732,
    'MAPE': 2.5191114800507455},
   'Tts_h_m': {'RMSE': 3.4259875156950113,
    'MAE': 2.1337811810681644,
    'MSE': 11.737390457698075,
    'R2': 0.3915097525162773,
    'NRMSE': 0.780057848805922,
    'MAPE': 2.474390912832649},
   'Tts_h_b': {'RMSE': 5.662426299598112,
    'MAE': 4.631098762150047,
    'MSE': 32.06307159838036,
    'R2': -0.8032862802461804,
    'NRMSE': 1.3428649523485898,
    'MAPE': 5.532194794894417},
   'Tts_c_t': {'RMSE': 4.667420188474347,


In [11]:
# Parse results dict into compatible format for latex table

table_data = parse_benchmark_results_to_latex_data(
    selected_alternatives=selected_alternatives,
    selected_sample_times=selected_sample_times,
    out_var_ids=out_var_ids,
    stats=stats,
    out_vars_dict=out_vars_dict,
    filter_out_tests=filter_out_tests
)


[32m2025-08-30 07:49:58.516[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m102[0m - [1mProcessing alternative: constant-water-props, sample time: 5 s[0m
[32m2025-08-30 07:49:58.518[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m133[0m - [1mAdded data for variable: Tts_h_t, test id: 20230630, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-08-30 07:49:58.519[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m133[0m - [1mAdded data for variable: Tts_h_t, test id: 20230508, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-08-30 07:49:58.520[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m133[0m - [1mAdded data for variable: Tts_h_t, test id: 20230707, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-08-30 07:49:58.520[0m | [1mINFO    [0m | 

In [12]:
table_str = generate_latex_table(
    regular_col_ids,
    regular_col_labels,
    metric_info,
    table_data,
    submetric_ids,
    group_row_ids,
    submetric_labels,
    group_submetric_ids
)

print(table_str)


\begin{tabular}{ccccccccccccccccccccc}
\hline
\multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Predicted\\ variable\end{tabular}}} &  & \multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Sample\\ time\\ (s)\end{tabular}}} &  & \multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Test\\ date\end{tabular}}} &  & \multicolumn{15}{c}{\textbf{Performance metric}}
\\\cline{7-21}
 &  &  &  &  &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}R$^2$\\ (-)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAE\\ (s.u.)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAPE\\ (\%)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}Time\\ (s)\end{tabular}}}
 \\\cline{7-9}\cline{11-13}\cline{15-17}\cline{19-21}
 &  &  &  &  &  & Test &  & Avg. &  & Test &  & Avg. &  & Test &  & Avg. &  & Test &  & Avg.
 \\\cline{1-1}\cline{3-3}\cline{5-5}\cline{7-7}\cline{9-9}\cline{11-11}\cline{13-13}\cline{15-15}\cline{17-17}\cline{19-

### Heat exchanger

In [None]:
# Load result
from solarmed_modeling.heat_exchanger.benchmark import model_id

out_vars_dict = {
    "Thx_p_out": "T$_{hx,p,out}$ ($^\\circ$C)",
    "Thx_s_out": "T$_{hx,s,out}$ ($^\\circ$C)",
}
out_var_ids = list(out_vars_dict.keys())

# eval_date_str = "20250720"
selected_alternatives: list[str] = ["constant-water-props"]
selected_sample_times: list[int] = [5, 400]

stats = import_benchmark_results(results_path=results_path, model_id=model_id, eval_date_str=eval_date_str)
# stats


ValueError: Model heat_exchanger not found in results for 20250829, available are: ['thermal_storage', 'solar_med']

In [48]:
# Parse results dict into compatible format for latex table

table_data = parse_benchmark_results_to_latex_data(
    selected_alternatives=selected_alternatives,
    selected_sample_times=selected_sample_times,
    out_var_ids=out_var_ids,
    stats=stats,
    out_vars_dict=out_vars_dict,
)


[32m2025-07-20 17:04:25.965[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m101[0m - [1mProcessing alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:25.966[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m129[0m - [1mAdded data for variable: Thx_p_out, test id: 20231030, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:25.967[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m129[0m - [1mAdded data for variable: Thx_p_out, test id: 20231106, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:25.968[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m129[0m - [1mAdded data for variable: Thx_p_out, test id: 20230630, alternative: constant-water-props, sample time: 5 s[0m
[32m2025-07-20 17:04:25.968[0m | [1mINFO    

In [49]:
table_str = generate_latex_table(
    regular_col_ids,
    regular_col_labels,
    metric_info,
    table_data,
    submetric_ids,
    group_row_ids,
    submetric_labels,
    group_submetric_ids
)

print(table_str)


\begin{tabular}{ccccccccccccccccccccc}
\hline
\multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Predicted\\ variable\end{tabular}}} &  & \multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Sample\\ time\\ (s)\end{tabular}}} &  & \multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Test\\ date\end{tabular}}} &  & \multicolumn{15}{c}{\textbf{Performance metric}}
\\\cline{7-21}
 &  &  &  &  &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}R$^2$\\ (-)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAE\\ (s.u.)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAPE\\ (\%)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}Time\\ (s)\end{tabular}}}
 \\\cline{7-9}\cline{11-13}\cline{15-17}\cline{19-21}
 &  &  &  &  &  & Test &  & Avg. &  & Test &  & Avg. &  & Test &  & Avg. &  & Test &  & Avg.
 \\\cline{1-1}\cline{3-3}\cline{5-5}\cline{7-7}\cline{9-9}\cline{11-11}\cline{13-13}\cline{15-15}\cline{17-17}\cline{19-

### MED

### SolarMED

In [23]:
# Load result
from solarmed_modeling.solar_med.benchmark import model_id

# ["Tsf_in", "Tsf_out", "Thx_s_out", *Th_labels, *Tc_labels, "qmed_d", "qmed_c"]
metrics_of_interest = ['mae', 'mape']
if "r2" in metric_info.keys():
    metric_info.pop("r2")

out_vars_dict = {
    "Tsf_in": "T$_{sf,in}$ ($^\\circ$C)",
    "Tsf_out": "T$_{sf,out}$ ($^\\circ$C)",
    "Tts_h_t": "T$_{ts,h}$ ($^\\circ$C)",
    "Tts_c_b": "T$_{ts,c}$ ($^\\circ$C)",
    "qmed_d": "q$_{med,d}$ (m$^3$/h)",
    # "Tmed_s_out": "T$_{med,s,out}$ ($^\\circ$C)",
    # "qmed_c": "q$_{med,c}$ (m$^3$/h)",
    # "Pth_sf": "$\dot{Q}_{sf}$ (kW$_{th}$)",
    # "Pth_ts_src": "$\dot{Q}_{ts,src}$ (kW$_{th}$)",
    # "Pth_ts_dis": "$\dot{Q}_{med,s}$ (kW$_{th}$)",
    # "Ets_h": "Q$_{ts,h}$ (kWh$_{th}$)",
    # "Ets_c": "Q$_{ts,c}$ (kWh$_{th}$)",
}
out_var_ids = list(out_vars_dict.keys())

# eval_date_str = "20250720"
selected_alternatives: list[str] = ["constant-water-props"]
selected_sample_times: list[int] = [400]

filter_out_tests = [
    "20230505",
    "20231031",
    "20230703",
    "20230508",
    "20230630",
    "20230629"
]

regular_col_ids = [
    "variable",
    # "modelling_alternative",
    # "sample_time",
    "test_id",
]
regular_col_labels = [
    r"Predicted\\ variable",
    # r"Modelling\\ principle",
    # r"Sample\\ time\\ (s)",
    r"Test\\ date",
]


stats = import_benchmark_results(results_path=results_path, model_id=model_id, eval_date_str=eval_date_str)
# stats


In [24]:
# Parse results dict into compatible format for latex table

table_data = parse_benchmark_results_to_latex_data(
    selected_alternatives=selected_alternatives,
    selected_sample_times=selected_sample_times,
    out_var_ids=out_var_ids,
    stats=stats,
    out_vars_dict=out_vars_dict,
)


[32m2025-09-10 16:51:57.164[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m102[0m - [1mProcessing alternative: constant-water-props, sample time: 400 s[0m
[32m2025-09-10 16:51:57.166[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m133[0m - [1mAdded data for variable: Tsf_in, test id: 20231030, alternative: constant-water-props, sample time: 400 s[0m
[32m2025-09-10 16:51:57.166[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m133[0m - [1mAdded data for variable: Tsf_in, test id: 20231106, alternative: constant-water-props, sample time: 400 s[0m
[32m2025-09-10 16:51:57.168[0m | [1mINFO    [0m | [36m__main__[0m:[36mparse_benchmark_results_to_latex_data[0m:[36m133[0m - [1mAdded data for variable: Tsf_out, test id: 20231030, alternative: constant-water-props, sample time: 400 s[0m
[32m2025-09-10 16:51:57.168[0m | [1mINFO    

In [25]:
table_str = generate_latex_table(
    regular_col_ids,
    regular_col_labels,
    metric_info,
    table_data,
    submetric_ids,
    group_row_ids,
    submetric_labels,
    group_submetric_ids
)

print(table_str)


\begin{tabular}{ccccccccccccccc}
\hline
\multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Predicted\\ variable\end{tabular}}} &  & \multirow{3}{*}{\textbf{\begin{tabular}[c]{@{}c@{}}Test\\ date\end{tabular}}} &  & \multicolumn{11}{c}{\textbf{Performance metric}}
\\\cline{5-15}
 &  &  &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAE\\ (s.u.)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}MAPE\\ (\%)\end{tabular}}} &  & \multicolumn{3}{c}{\textbf{\begin{tabular}[c]{@{}c@{}}Time\\ (s)\end{tabular}}}
 \\\cline{5-7}\cline{9-11}\cline{13-15}
 &  &  &  & Test &  & Avg. &  & Test &  & Avg. &  & Test &  & Avg.
 \\\cline{1-1}\cline{3-3}\cline{5-5}\cline{7-7}\cline{9-9}\cline{11-11}\cline{13-13}\cline{15-15}
\multirow{2}{*}{T$_{sf,in}$ ($^\circ$C)} &  & 20231030 &  & 6.43 &  & \multirow{2}{*}{5.57} &  & 13.48 &  & \multirow{2}{*}{12.81} &  & 2.00 &  & \multirow{10}{*}{2.47} \\
 &  & 20231106 &  & 4.71 &  &  &  & 12.13 &  &  &  & 2.95 &  &  \\
\multirow{2}