In [None]:
import os

import numpy as np
import matplotlib.pyplot as plt

from symr.helpers import r2_over_threshold, r2_auc, plot_r2_over_threshold

import pandas as pd
import sympy as sp
import math

In [None]:
# modify the results path and string tag to load different csvs
root_path = ".."
results_path = os.path.join(root_path, "results", "wwo_bfgs")
string_tag = ["860200"]

In [None]:
"""
\begin{table}[h]
\center{
\begin{tabular}{|l || c | c || c | c  |} \hline
          &  \multicolumn{2}{|c||}{NMSE} &  \multicolumn{2}{|c|}{$R^2$} \\ \hline
          &  BFGS      & no BFGS      & BFGS        & no BFGS  \\ \hline \hline

\cite{biggio2021}   & 0.786 & Tyrell - 10/4 & -19.09 & -2.00E+65 \\ \hline
%\cite{biggio2020}   &  &  &  &   \\ \hline
\cite{valipour2021}   & 7.97 & 15.63 &  0.561 & -3.827  \\ \hline
\cite{vastl2022} & Tyrell - 10/4 & Tyrell - 10/4 & -0.56 &  \textit{-2.83}  \\ \hline
\cite{kamienny2022} & N/A & N/A & 0.68 &  0.62 \\ \hline
\end{tabular}
}
\label{table:method_accuracy}
\caption{Average results over all benchmark equations for two different numeric evaluations metrics, with and without optimization techniques.
}
\end{table}
"""

In [None]:
"""


\begin{table}[h] 
\center{ 
\begin{tabular}{|l || c | c ||} \hline 
&  \multicolumn{2}{|c||}{NMSE}  \\ 
&  BFGS      & no BFGS        \\ \hline \hline 
NSRTS (median) & 7.68e-02 $\pm$ 1.353e+14 &  3.39e-01 $\pm$ 1.226e+40  \\ \hline 
SymGPT (median) & 8.97e-01 $\pm$ 2.400e+08 &  3.55e+01 $\pm$ 1.092e+10 \\  \hline
Symformer (median) & 6.46e-01 $\pm$ 3.765e+03 &  5.69e-01 $\pm$ 6.400e+01  \\ \hline  \hline 

    \end{tabular}

    }
    \caption{
    Effect of post-inference BFGS interpolation numerical metrics for A.B. equations 
    }
    \end{table}
"""

"""
\begin{table}[h] 
\center{ 
\begin{tabular}{|l || c | c ||} \hline 
&  \multicolumn{2}{|c||}{NMSE}  \\ 
&  BFGS      & no BFGS        \\ \hline \hline 
Symformer (median) & 1.14e+00 $\pm$ 3.372e+04 &  1.56e+00 $\pm$ 2.144e+04 
 \\ \hline  \hline 
SymGPT (median) & 3.78e-01 $\pm$ inf &  4.76e+01 $\pm$ inf 
 \\ \hline  \hline 
NSRTS (median) & 2.82e-01 $\pm$ 3.824e+41 &  1.07e+00 $\pm$ inf 
 \\ \hline  \hline 

    \end{tabular}

    }
    \caption{
    Effect of post-inference BFGS interpolation numerical metrics for Nguyen equations 
    }
    \end{table}
    
"""

In [None]:
# interpolation version of table 2 for nmse

list_dir = os.listdir(results_path)

active_dir = []

numerical_table = {}

for filename in list_dir:
    toss = 0
    for str_tag in string_tag:
        if str_tag not in filename:
            toss += 1
    if not(toss):
        active_dir.append(filename)
        
        print(filename, toss)
for filename in active_dir:

    if "b0" in filename:
        other_filename = filename.replace("b0", "b1")
        active_dir.remove(other_filename)
    elif "b1" in filename:
        other_filename = filename.replace("b1", "b0")
        active_dir.remove(other_filename)
for table_tag in ["nguyen", "ab_complex"]:

    latex_table = "\\begin{table}[h] \n"
    latex_table += "\\center{ \n"
    latex_table += "\\begin{tabular}{|l || c | c ||} \\hline \n"
    latex_table += "&  \\multicolumn{2}{|c||}{NMSE}  \\\\ \n"
    latex_table += "&  BFGS      & no BFGS        \\\\ \\hline \\hline \n"
        
    for filename in active_dir:
        
        if "b0" in filename:
            other_filename = filename.replace("b0", "b1")
        elif "b1" in filename:
            other_filename = filename.replace("b1", "b0")

        filepath = os.path.join(results_path, filename)
        other_filepath = os.path.join(results_path, other_filename)
    
        df_a = pd.read_csv(filepath)
        df_b = pd.read_csv(other_filepath)
        
        df = pd.concat([df_a, df_b])
        methods = df["method"].unique()[1:]

        for method in methods:
            
            if table_tag in filename:

                in_success = " False" == df.loc[df["method"] == method]["failed"].to_numpy()
                
                # nmse
                in_nmse_raw = df.loc[df["method"] == method][" in_nmse"]
                # r2
                in_r2_raw = df.loc[df["method"] == method]["in_r2"]
                                
                in_nmse_bfgs = df.loc[df["method"] == method][" use_bfgs"].to_numpy(dtype=float)
                
                in_success[" None" == in_nmse_raw] = 0
                in_success[" None" == in_r2_raw] = 0                
                
                in_nmse_b0 =  in_nmse_raw[1.0 == (in_success * (1-in_nmse_bfgs))].to_numpy(dtype=float)
                in_nmse_b1 =  in_nmse_raw[1.0 == (in_success * in_nmse_bfgs)].to_numpy(dtype=float)                
               
                in_nmse_b0 = in_nmse_b0[np.isfinite(in_nmse_b0)]
                in_nmse_b1 = in_nmse_b1[np.isfinite(in_nmse_b1)]                
               
                table_row = f"{method} (median) & {np.median(in_nmse_b1):.2e} $\pm$ {np.std(in_nmse_b1):.3e} & "
                table_row += f" {np.median(in_nmse_b0):.2e} $\pm$ {np.std(in_nmse_b0):.3e} "

                table_row += " \\\\ \hline  \hline \n"

                latex_table += table_row


    
    if "nguyen" in table_tag:
        bm_name = "Nguyen"
    elif "ab_complex" in table_tag:
        bm_name = "A.B."
    
              
    latex_table += """
    \end{tabular}\n
    }
    \caption{
    """
    latex_table += f"Effect of post-inference BFGS interpolation numerical metrics for {bm_name} equations "
    latex_table += """
    }
    \end{table}
    """
    
    print(latex_table)
    
if np.sum(" None" == in_r2_raw) or np.sum(" None" == in_nmse_raw):
    print("Nones (failures) in metrics:", \
          np.sum(" None" == in_r2_raw), np.sum(" None" == in_nmse_raw))
                
     

In [None]:
# interpolation version of table 2 for nmse

list_dir = os.listdir(results_path)

active_dir = []

numerical_table = {}

for filename in list_dir:
    toss = 0
    for str_tag in string_tag:
        if str_tag not in filename:
            toss += 1
    if not(toss):
        active_dir.append(filename)
        
        print(filename, toss)
for filename in active_dir:

    if "b0" in filename:
        other_filename = filename.replace("b0", "b1")
        active_dir.remove(other_filename)
    elif "b1" in filename:
        other_filename = filename.replace("b1", "b0")
        active_dir.remove(other_filename)
for table_tag in ["nguyen", "ab_complex"]:

    latex_table = "\\begin{table}[h] \n"
    latex_table += "\\center{ \n"
    latex_table += "\\begin{tabular}{|l || c | c ||} \\hline \n"
    latex_table += "&  \\multicolumn{2}{|c||}{$R^2$}  \\\\ \n"
    latex_table += "&  BFGS      & no BFGS        \\\\ \\hline \\hline \n"
        
    for filename in active_dir:
        
        if "b0" in filename:
            other_filename = filename.replace("b0", "b1")
        elif "b1" in filename:
            other_filename = filename.replace("b1", "b0")

        filepath = os.path.join(results_path, filename)
        other_filepath = os.path.join(results_path, other_filename)
    
        df_a = pd.read_csv(filepath)
        df_b = pd.read_csv(other_filepath)
        
        df = pd.concat([df_a, df_b])
        methods = df["method"].unique()[1:]

        for method in methods:
            
            if table_tag in filename:

                in_success = " False" == df.loc[df["method"] == method]["failed"].to_numpy()
                                
                # r2
                in_r2_raw = df.loc[df["method"] == method]["in_r2"]
                                
                in_r2_bfgs = df.loc[df["method"] == method][" use_bfgs"].to_numpy(dtype=float)
                
                in_success[" None" == in_r2_raw] = 0
                                
                in_r2_b0 =  in_r2_raw[1.0 == (in_success * (1-in_r2_bfgs))].to_numpy(dtype=float)
                in_r2_b1 =  in_r2_raw[1.0 == (in_success * in_r2_bfgs)].to_numpy(dtype=float)
                
                # filter out non-finite values
                in_r2_b0 = in_r2_b0[np.isfinite(in_r2_b0)]
                in_r2_b1 = in_r2_b1[np.isfinite(in_r2_b1)]

                table_row = f"{method} (median) & {np.median(in_r2_b1):.2e} $\pm$ {np.std(in_r2_b1):.3e} & "
                table_row += f" {np.median(in_r2_b0):.2e} $\pm$ {np.std(in_r2_b0):.3e} "

                table_row += " \\\\ \hline  \hline \n"

                latex_table += table_row


    
    if "nguyen" in table_tag:
        bm_name = "Nguyen"
    elif "ab_complex" in table_tag:
        bm_name = "A.B."
    
              
    latex_table += """
    \end{tabular}\n
    }
    \caption{
    """
    latex_table += f"Effect of post-inference BFGS interpolation numerical metrics for {bm_name} equations "
    latex_table += """
    }
    \end{table}
    """
    
    print(latex_table)
    
if np.sum(" None" == in_r2_raw) or np.sum(" None" == in_nmse_raw):
    print("Nones (failures) in metrics:", \
          np.sum(" None" == in_r2_raw), np.sum(" None" == in_nmse_raw))
                
     

In [None]:
# interpolation version of table 1


list_dir = os.listdir(results_path)

active_dir = []

numerical_table = {}

for filename in list_dir:
    toss = 0
    for str_tag in string_tag:
        if str_tag not in filename:
            toss += 1
    if not(toss):
        active_dir.append(filename)
        
        print(filename, toss)
for filename in active_dir:

    if "b0" in filename:
        other_filename = filename.replace("b0", "b1")
        active_dir.remove(other_filename)
    elif "b1" in filename:
        other_filename = filename.replace("b1", "b0")
        active_dir.remove(other_filename)
for table_tag in ["nguyen", "ab_complex"]:

    latex_table = "\\begin{table}[h] \n"
    latex_table += "\\center{ \n"
    latex_table += "\\begin{tabular}{|l || c | c || c | c  |} \hline \n"
    latex_table += "&  \\multicolumn{2}{|c||}{NMSE} &  \\multicolumn{2}{|c|}{$R^2$} \\\\ \\hline \n"
    latex_table += "&  BFGS      & no BFGS      & BFGS        & no BFGS  \\\\ \\hline \\hline \n"
        
    for filename in active_dir:
        
        if "b0" in filename:
            other_filename = filename.replace("b0", "b1")
        elif "b1" in filename:
            other_filename = filename.replace("b1", "b0")

        filepath = os.path.join(results_path, filename)
        other_filepath = os.path.join(results_path, other_filename)
    
        df_a = pd.read_csv(filepath)
        df_b = pd.read_csv(other_filepath)
        
        df = pd.concat([df_a, df_b])
        methods = df["method"].unique()[1:]

        for method in methods:
            
            if table_tag in filename:

                in_success = " False" == df.loc[df["method"] == method]["failed"].to_numpy()
                
                # nmse
                in_nmse_raw = df.loc[df["method"] == method][" in_nmse"]
                # r2
                in_r2_raw = df.loc[df["method"] == method]["in_r2"]
                                
                in_nmse_bfgs = df.loc[df["method"] == method][" use_bfgs"].to_numpy(dtype=float)
                
                in_success[" None" == in_nmse_raw] = 0
                in_success[" None" == in_r2_raw] = 0
                
                
                in_nmse_b0 =  in_nmse_raw[1.0 == (in_success * (1-in_nmse_bfgs))].to_numpy(dtype=float)
                in_nmse_b1 =  in_nmse_raw[1.0 == (in_success * in_nmse_bfgs)].to_numpy(dtype=float)
                
                # filter out non-finite values
                if(0):
                    
                    print(np.sum(np.isfinite(in_nmse_b0)), np.sum(np.isfinite(in_nmse_b1)))
                    print(np.sum(np.isfinite(in_r2_b0)), np.sum(np.isfinite(in_r2_b1)))

                    print(np.sum(0.0 < np.abs(in_nmse_b0)), np.sum(0.0 < np.abs(in_nmse_b1)))
                    print(np.sum(0.0 < np.abs(in_r2_b0)), np.sum(0.0 < np.abs(in_r2_b1)))

                
                in_nmse_b0 = in_nmse_b0[np.isfinite(in_nmse_b0)]
                in_nmse_b1 = in_nmse_b1[np.isfinite(in_nmse_b1)]
                                
                in_r2_b0 =  in_r2_raw[1.0 == (in_success * (1-in_nmse_bfgs))].to_numpy(dtype=float)
                in_r2_b1 =  in_r2_raw[1.0 == (in_success * in_nmse_bfgs)].to_numpy(dtype=float)
                
                # filter out non-finite values
                in_r2_b0 = in_r2_b0[np.isfinite(in_r2_b0)]
                in_r2_b1 = in_r2_b1[np.isfinite(in_r2_b1)]

                # should not occur
                if(0):
                    if " None" in in_nmse_b0:
                        print("should not have Nones")
                    if " None" in in_nmse_b1:
                        print("should not have Nones")
                    if " None" in in_r2_b0:
                        print("should not have Nones")
                    if " None" in in_r2_b1:
                        print("should not have Nones")
                
                table_row = f"{method} (median) & {np.median(in_nmse_b1):.2e}& "
                table_row += f" {np.median(in_nmse_b0):.2e}  &"
                
                table_row += f"{np.median(in_r2_b1):.2e} &"
                table_row += f" {np.median(in_r2_b0):.2e}  "

                table_row += " \\\\ \hline  \hline \n"

                latex_table += table_row


    
    if "nguyen" in table_tag:
        bm_name = "Nguyen"
    elif "ab_complex" in table_tag:
        bm_name = "A.B."
    
              
    latex_table += """
    \end{tabular}\n
    }
    \caption{
    """
    latex_table += f"Effect of post-inference BFGS interpolation numerical metrics for {bm_name} equations "
    latex_table += """
    }
    \end{table}
    """
    
    print(latex_table)
    
if np.sum(" None" == in_r2_raw) or np.sum(" None" == in_nmse_raw):
    print("Nones (failures) in metrics:", \
          np.sum(" None" == in_r2_raw), np.sum(" None" == in_nmse_raw))
                
     