In [2]:
import os

import numpy as np
import matplotlib.pyplot as plt

from symr.helpers import r2_over_threshold, r2_auc, plot_r2_over_threshold
from symr.metrics import compute_tree_traversal

import pandas as pd
import sympy as sp
import math

In [3]:
# modify the results path and string tag to load different csvs
root_path = ".."
results_path = os.path.join(root_path, "results", "wwo_bfgs")
string_tag = ["860200"]

In [None]:
"""

\begin{table}[h]
\center{
\begin{tabular}{l | c | c | c | c  }
Reference & Cross-entropy & Symb.\ Equiv.\ & Tree Edit Dist.\  \\ \hline
Random & 1.79 & 0 &   43.29  \\ \hline
Fourier & 3.46 & 0 &   161.6  \\ \hline
Poly & 3.66 & 0 &  60.03   \\ \hline
\cite{biggio2021}    & 1.51 & 0.081 &  11.86   \\ \hline
\cite{valipour2021}   & 1.96 & 0.065 &  1.80   \\ \hline
\cite{vastl2022} &  Tyrell - 9/26  & Tyrell - 9/26 &  Tyrell - 9/26   \\ \hline
\cite{kamienny2022}   & N/A & N/A &  N/A   \\ \hline
\end{tabular}
}
\label{table:symbolic}
\caption{A comparison of symbolic metrics on networks from literature}
\end{table}

"""

In [5]:
df.columns

Index(['method', ' use_bfgs', ' expression', ' predicted', ' trial', ' k_fold',
       ' tree_distance', ' exact', 'in_nmae', ' in_nmse', 'in_r2',
       ' in_r2_cuttoff', ' in_r2_over_95', ' in_r2_over_99', ' in_r2_over_999',
       ' in_isclose', 'ex_nmae', ' ex_nmse', 'ex_r2', ' ex_r2_cuttoff',
       ' ex_r2_over_95', ' ex_r2_over_99', ' ex_r2_over_999', ' ex_isclose',
       'failed', ' time_elapsed', ' git_hash', ' entry_point'],
      dtype='object')

In [29]:
# interpolation version of table 2 for nmse

list_dir = os.listdir(results_path)

active_dir = []

numerical_table = {}

for filename in list_dir:
    toss = 0
    for str_tag in string_tag:
        if str_tag not in filename:
            toss += 1
    if not(toss):
        active_dir.append(filename)
        
        print(filename, toss)

latex_table = "\\begin{table}[h] \n"
latex_table += "\\center{ \n"
latex_table += "\\begin{tabular}{l | c | } \n"
latex_table += "Method & Normalized Equation Length  \\\\ \\hline \n"
 
results = {"SymGPT": {},\
          "Symformer": {},\
          "NSRTS": {},\
          "PySR": {}}
 
for filename in active_dir:

    filepath = os.path.join(results_path, filename)

    df = pd.read_csv(filepath)
    methods = df["method"].unique()[1:]

    for method in methods:

        in_success = " False" == df.loc[df["method"] == method]["failed"].to_numpy()
        
        expressions = df.loc[df["method"] == method][" expression"][in_success].to_list()
        equivalence = list(" True" == df.loc[df["method"] == method][" exact"][in_success].to_numpy())
        tree_distance = list(df.loc[df["method"] == method][" tree_distance"][in_success].to_numpy(dtype=float))
        
        if "equivalence" in results[method].keys():
            
            results[method]["expressions"].extend(expressions)
            results[method]["equivalence"].extend(equivalence)
            results[method]["tree_distance"].extend(tree_distance)
        else:      
            results[method]["expressions"] =  expressions
            results[method]["equivalence"] =  equivalence
            results[method]["tree_distance"] = tree_distance
            


NSRTS_bmab_complex_b0_z30_k3_t100_d10_860200.csv 0
NSRTS_bmab_complex_b1_z30_k3_t100_d10_860200.csv 0
SymGPT_bmab_complex_b0_z30_k3_t100_d10_860200.csv 0
Symformer_r12_bmnguyen_b1_z30_k3_t100_d10_860200_1_partial.csv 0
Symformer_bmab_complex_b0_z30_k3_t100_d10_860200_partial.csv 0
SymGPT_bmab_complex_b1_z30_k3_t100_d10_860200.csv 0
SymGPT_r12_bmnguyen_b1_z30_k3_t100_d10_860200.csv 0
SymGPT_r12_bmnguyen_b0_z30_k3_t100_d10_860200.csv 0
PySR_bmnguyen_b1_z30_k3_t100_d10_860200.csv 0
NSRTS_bmnguyen_b0_z30_k3_t100_d10_860200.csv 0
Symformer_bmab_complex_b1_z30_k3_t100_d10_860200_partial.csv 0
PySR_bmab_complex_b1_z30_k3_t100_d10_860200.csv 0
NSRTS_bmnguyen_b1_z30_k3_t100_d10_860200.csv 0
Symformer_r12_bmnguyen_b0_z30_k3_t100_d10_860200_1_partial.csv 0


In [34]:

latex_table = "\\begin{table}[h] \n"
latex_table += "\\center{ \n"
latex_table += "\\begin{tabular}{l | c | c |} \n"
latex_table += "Method & Exact Equivalence & Tree Distance  \\\\ \\hline \n"
 
        
for method in results.keys():
    
    mean_equivalence = np.mean(results[method]["equivalence"])
    mean_dist = np.mean(results[method]["tree_distance"])
                                   
    std_dev_dist = np.std(results[method]["tree_distance"])
        
    table_row = f"{method}  &  {mean_equivalence:.2f} & {mean_dist:.2f} $\pm$ {std_dev_dist:.2f} \\\\ \n"
    
    latex_table += table_row



latex_table += """
\end{tabular}\n
}

\caption{Exact symbolic equivalance and tree edit distance. Distance values are mean $\pm$ standard deviation.}
    
\end{table}
"""

print(latex_table)

             
    
# this version normalizes the tree edit distance to equation tree traversal length

latex_table = "\\begin{table}[h] \n"
latex_table += "\\center{ \n"
latex_table += "\\begin{tabular}{l | c | c |} \n"
latex_table += "Method & Exact Equivalence & Tree Distance  \\\\ \\hline \n"
 
        
for method in results.keys():
    
    lengths = [compute_tree_traversal(elem) for elem in results[method]["expressions"]]
    
    mean_equivalence = np.mean(results[method]["equivalence"])
    
    tree_distance = [elem1/elem2 for elem1, elem2 in zip(results[method]["tree_distance"], lengths)]
    
    mean_dist = np.mean(lengths)
                                   
    std_dev_dist = np.std(lengths)
        
    table_row = f"{method}  &  {mean_equivalence:.2f} & {mean_dist:.2f} $\pm$ {std_dev_dist:.2f} \\\\ \n"
    
    latex_table += table_row



latex_table += """
\end{tabular}\n
}

\caption{Exact symbolic equivalance and tree edit distance. Distance values are normalized to the tree traversal length of the target expression, mean $\pm$ standard deviation.}
    
\end{table}
"""

print(latex_table)

                
     
    
     

\begin{table}[h] 
\center{ 
\begin{tabular}{l | c | c |} 
Method & Exact Equivalence & Tree Distance  \\ \hline 
SymGPT  &  0.00 & 27.90 $\pm$ 10.91 \\ 
Symformer  &  0.12 & 17.54 $\pm$ 13.36 \\ 
NSRTS  &  0.16 & 10.99 $\pm$ 6.95 \\ 
PySR  &  0.25 & 12.73 $\pm$ 10.33 \\ 

\end{tabular}

}

\caption{Exact symbolic equivalance and tree edit distance. Distance values are mean $\pm$ standard deviation.}
    
\end{table}

\begin{table}[h] 
\center{ 
\begin{tabular}{l | c | c |} 
Method & Exact Equivalence & Tree Distance  \\ \hline 
SymGPT  &  0.00 & 11.99 $\pm$ 4.40 \\ 
Symformer  &  0.12 & 10.34 $\pm$ 4.77 \\ 
NSRTS  &  0.16 & 10.09 $\pm$ 4.51 \\ 
PySR  &  0.25 & 10.10 $\pm$ 4.50 \\ 

\end{tabular}

}

\caption{Exact symbolic equivalance and tree edit distance. Distance values are normalized to the tree traversal length of the target expression, mean $\pm$ standard deviation.}
    
\end{table}

