In [None]:
import os

import numpy as np
import matplotlib.pyplot as plt

from symr.helpers import r2_over_threshold, r2_auc, plot_r2_over_threshold

import pandas as pd

In [None]:
# modify the results path and string tag to load different csvs
root_path = ".."
results_path = os.path.join(root_path, "results")
string_tag = "860200_partial.csv"

In [None]:
list_dir = os.listdir(results_path)

active_dir = []

r2_auc_table = {}

for filename in list_dir:
    if string_tag in filename:
        active_dir.append(filename)
        
for filename in active_dir:

    ex_r2_aucs = {}
    in_r2_aucs = {}

    ex_r2_ots = {}
    in_r2_ots = {}

    ex_success = {}
    in_success = {}
    fails = {}
    
    filepath = os.path.join(results_path, filename)

    df = pd.read_csv(filepath)
    expressions = df[" expression"].unique()[1:]
    
    for expr in expressions:
        
        success = " False" == df.loc[df[" expression"] == expr]["failed"].to_numpy()
        
        in_r2s_raw = df.loc[df[" expression"] == expr]["in_r2"]
        success[in_r2s_raw == " None"] = 0.0
        
        success[1  -(np.isfinite(in_r2s_raw.to_numpy(dtype=float)))] = 0.0
        
        in_r2s = in_r2s_raw[success].to_numpy(dtype=float)
        in_r2_ot = r2_over_threshold(in_r2s)
        in_r2_auc = r2_auc(in_r2_ot)
        
        in_success[expr] = np.mean(success)
        
        success = " False" == df.loc[df[" expression"] == expr]["failed"].to_numpy()
        ex_r2s_raw = df.loc[df[" expression"] == expr]["ex_r2"]
        
        if " None" in ex_r2s_raw:
            print(expr)
        success[ex_r2s_raw == " None"] = 0.0
        
        success[1  -(np.isfinite(ex_r2s_raw.to_numpy(dtype=float)))] = 0.0
        
        ex_r2s = ex_r2s_raw[success].to_numpy(dtype=float)
        ex_r2_ot = r2_over_threshold(ex_r2s)
        ex_r2_auc = r2_auc(ex_r2_ot)
        
        ex_success[expr] = np.mean(success)
        
        ex_r2_aucs[expr] = ex_r2_auc
        in_r2_aucs[expr] = in_r2_auc

        ex_r2_ots[expr] = ex_r2_ot
        in_r2_ots[expr] = in_r2_ot
        
    
    method = filename.split("_")[0]
    bfgs_used = "_b1_" in filename
    wo = "w/" if bfgs_used else "w/o"
    
    for key in ex_r2_ots.keys():

        title_key = key.replace("**","^")
        method = filename.split("_")[0]
        my_title = f"$R^2$ Curve \n {method} {wo} BFGS \n ${title_key}$"
        fig, ax = plot_r2_over_threshold(in_r2_ots[key], ex_r2_ots[key], title=my_title)
        plt.legend(fontsize=16)
        plt.show()
        
    
    table_key = os.path.splitext(filename)
    
    in_r2_auc_list = [in_r2_aucs[expr] for expr in in_r2_aucs.keys()]
    ex_r2_auc_list = [ex_r2_aucs[expr] for expr in ex_r2_aucs.keys()]
    failure_list = [1.0 - ex_success[expr] for expr in ex_success.keys()]
    
    nice_name = f"{method} {wo} BFGS"
    
    table_columns = {\
                     "nice_name": nice_name, \
                     "in_r2_auc_mean": np.mean(in_r2_auc_list),\
                     "in_r2_auc_stdev": np.std(in_r2_auc_list),\
                     "ex_r2_auc_mean": np.mean(ex_r2_auc_list),\
                     "ex_r2_auc_stdev": np.std(ex_r2_auc_list),\
                     "failures_mean": np.mean(failure_list),\
                     "failures_stdev": np.std(failure_list)
                    }
    r2_auc_table[table_key] = table_columns

In [None]:
"""
Reference & NMSE & $R^2 > 0.99$ & $R^2 > 0.95$ & np.isclose  \\ \hline

\begin{table}[h]
\center{
\begin{tabular}{l | c | c | c | c | }
Reference & NMSE & $R^2 > 0.99$ & $R^2 > 0.95$ & np.isclose  \\ \hline

\cite{biggio2021} & 0.786 & 0.29 & 0.35 & \textbf{0.19}  \\ \hline
\cite{valipour2021} & \textbf{7.97} & 0.37 & 0.46 & 0.07  \\ \hline
\cite{vastl2022} & Tyrell - 10/4 & 0.18 & 0.23 & 0.14   \\ \hline
\cite{kamienny2022} & N/A & N/A & N/A  & N/A  \\ \hline
\end{tabular}
}
\label{table:numerical}
\caption{Comparison of numerical metrics from the literature}
\end{table}
"""

In [None]:
active_dir

In [None]:
"""
This section builds a LaTeX-formatted table for R^2 AUC
"""

table_tag = "nguyen"

for table_tag in ["nguyen", "ab_complex"]:

    latex_table = "\\begin{table}[h]\n"
    latex_table += "\\center{ \n"
    latex_table += "\\begin{tabular}{l | c | c |  }\n"

    first_row = None
    for key in r2_auc_table.keys():
        if first_row is None:
            first_row = "\\bf{Method} & \\bf{$R^2 AUC_{i.d.} \pm$ std. dev.} & \\bf{$R^2 AUC_{o.o.d.} \pm$ std. dev.}  \\\\ \n"
            first_row += "\hline \\\\ \n"
            latex_table += first_row
        if table_tag in key[0]:

            table_row = f"{r2_auc_table[key]['nice_name']} &"

            table_row += f" {r2_auc_table[key]['in_r2_auc_mean']:.2e} $\pm$ {r2_auc_table[key]['in_r2_auc_stdev']:.2e} &"
            table_row += f" {r2_auc_table[key]['ex_r2_auc_mean']:.2e} $\pm$ {r2_auc_table[key]['ex_r2_auc_stdev']:.2e}  \\\\"
            #table_row += f" {r2_auc_table[key]['failures_mean']:.2e} +/- {r2_auc_table[key]['failures_stdev']:.2e} \\\\ \n"

            latex_table += table_row

    if "nguyen" in table_tag:
        latex_table += """
        \end{tabular}
        }
        \caption{$R^2$ over threshold area under the curve (AUC) on Nguyen equations}
        \end{table}
        """
    else:
        latex_table += """
        \end{tabular}
        }
        \caption{$R^2$ over threshold area under the curve (AUC) on A.B. equations}
        \end{table}
        """

    print(latex_table)

In [None]:
"""
This section builds a LaTeX-formatted table for R^2 AUC
"""

for table_tag in ["nguyen", "ab_complex"]:

    latex_table = "\\begin{table}[h]\n"
    latex_table += "\\center{ \n"
    latex_table += "\\begin{tabular}{l | c | }\n"


    first_row = None
    for key in r2_auc_table.keys():
        if first_row is None:
            first_row = "Method & Failure rate $\pm$ standard deviation   \\\\ \n"
            first_row += "\hline \\\\ \n"
            latex_table += first_row
        if table_tag in key[0]:
            
            table_row = f"{r2_auc_table[key]['nice_name']} &"

            #table_row += f" {r2_auc_table[key]['in_r2_auc_mean']:.2e} +/- {r2_auc_table[key]['in_r2_auc_stdev']:.2e} &"
            #table_row += f" {r2_auc_table[key]['ex_r2_auc_mean']:.2e} +/- {r2_auc_table[key]['ex_r2_auc_stdev']:.2e} & \\\\ \n"
            table_row += f" {r2_auc_table[key]['failures_mean']:.2e} +/- {r2_auc_table[key]['failures_stdev']:.2e} \\\\ \n"

            latex_table += table_row

    if "nguyen" in table_tag:
        bm_name = "Nguyen"
    elif "ab_complex" in table_tag:
        bm_name = "A.B."
        
    latex_table += """
    \end{tabular}\n
    }
    \caption{
    """
    latex_table += f"Failure rates on {bm_name} equations"
    latex_table += """
    }
    \end{table}
    """
    
    print(latex_table)