In [None]:
import os

import numpy as np
import matplotlib.pyplot as plt

from symr.helpers import r2_over_threshold, r2_auc, plot_r2_over_threshold
from symr.metrics import compute_tree_traversal

import pandas as pd
import sympy as sp
import math

In [None]:
# modify the results path and string tag to load different csvs
root_path = ".."
results_path = os.path.join(root_path, "results", "wwo_bfgs")
string_tag = ["860200"]

In [None]:
"""
\begin{table}[h]
\center{
\begin{tabular}{l | c | c |  }
Reference & Target & Predicted  \\ \hline

\cite{biggio2021} & 21.95 &  28.49   \\ \hline
%\cite{biggio2020} &  &  &     \\ \hline
\cite{valipour2021} & 44.29 & 33.06    \\ \hline
\cite{vastl2022} & \textit{20.64} &   \textit{55.30}   \\ \hline
\cite{kamienny2022} & N/A &  N/A  \\ \hline
\end{tabular}
}
\label{table:length}
\caption{Comparison of equation length for equations predicted by networks}
\end{table}
"""

In [None]:
df.columns

In [None]:
# interpolation version of table 2 for nmse

list_dir = os.listdir(results_path)

active_dir = []

numerical_table = {}

for filename in list_dir:
    toss = 0
    for str_tag in string_tag:
        if str_tag not in filename:
            toss += 1
    if not(toss):
        active_dir.append(filename)
        
        print(filename, toss)

latex_table = "\\begin{table}[h] \n"
latex_table += "\\center{ \n"
latex_table += "\\begin{tabular}{l | c | } \n"
latex_table += "Method & Normalized Equation Length  \\\\ \\hline \n"
 
results = {"SymGPT": {},\
          "Symformer": {},\
          "NSRTS": {},\
          "PySR": {}}
 
for filename in active_dir:

    filepath = os.path.join(results_path, filename)

    df = pd.read_csv(filepath)
    methods = df["method"].unique()[1:]

    for method in methods:

        in_success = " False" == df.loc[df["method"] == method]["failed"].to_numpy()
        
        expressions = df.loc[df["method"] == method][" expression"][in_success].to_list()
        predictions = df.loc[df["method"] == method][" predicted"][in_success].to_list()
        
        if "expressions" in results[method].keys():
            
            results[method]["expressions"].extend(expressions)
            results[method]["predictions"].extend(predictions)
        else:
        
            
            results[method]["expressions"] =  expressions
            results[method]["predictions"] = predictions
            


In [None]:

latex_table = "\\begin{table}[h] \n"
latex_table += "\\center{ \n"
latex_table += "\\begin{tabular}{l | c | } \n"
latex_table += "Method & Normalized Equation Length  \\\\ \\hline \n"
 
        
for method in results.keys():
    
    lengths = [compute_tree_traversal(elem1.replace("–","-")) / compute_tree_traversal(elem2) for \
                   elem1, elem2 in zip(results[method]["predictions"], results[method]["expressions"])]
    mean_length = np.mean(lengths)
    std_dev_length = np.std(lengths)
        
    table_row = f"{method}  & {mean_length:.2f} $\pm$ {std_dev_length:.2f} \\\\ \n"
    
    latex_table += table_row



latex_table += """
\end{tabular}\n
}

\caption{Normalized equation length in terms of predicted expression tree traversal, divided by target tree traversal length. Values are mean $\pm$ standard deviation.}
    
\end{table}
"""

print(latex_table)

                
     

In [None]:
# an em-dash will not work in sympy

temp = "sin(1.09683014676739*sqrt(exp(cos(atan2(0, Abs(Abs(x)))))))" 
temp = "((((((1^(3/4) - 0.80229133))))))" 
temp = "((((x/2)/sqrt(Abs(Abs(x)^(3/4) – 0.80229133)))))"

temp = "((((x/2)/sqrt(Abs(Abs(x)^(3/4) – 0.80229133)))))".replace("–", "-")
#temp = "atan2(0, Abs(Abs(x)))"
import sympy as sp
sp.sympify(temp)