In [2]:
import sys
sys.path.append('../../')

import numpy as np
import matplotlib.pyplot as plt
from gplearn.gplearn.genetic import SymbolicRegressor
from experiments.load_data import load_data
# from experiments.benchmarks import run_experiment, categorical_variables_per_dataset, create_categorical_variable_dict
from interpret.glassbox import ExplainableBoostingClassifier, ExplainableBoostingRegressor
from sklearn.linear_model import LinearRegression
from collections import defaultdict
import pandas as pd
from xgboost import XGBClassifier, XGBRegressor
import time

In [None]:
def get_model(model,task):
    if model == 'xgb':
        if task == 'regression':
            return XGBRegressor()
        elif task == 'classification':
            return XGBClassifier()
    elif model == 'ebm':
        if task == 'regression':
            return ExplainableBoostingRegressor()
        elif task == 'classification':
            return ExplainableBoostingClassifier()
    elif model == 'ebm_no_interactions':
        if task == 'regression':
            return ExplainableBoostingRegressor(interactions=0)
        elif task == 'classification':
            return ExplainableBoostingClassifier(interactions=0)

In [None]:
task = 'regression'
dataset_names = [
    'feynman_I_6_2b',
    'feynman_I_8_14',
    'feynman_I_12_2',
    'feynman_I_12_11',
    'feynman_I_18_12',
    'feynman_I_29_16',
    'feynman_I_32_5',
    'feynman_I_40_1',
    'feynman_II_2_42'
]

global_seed = 0
model_names = ['xgb','ebm_no_interactions','ebm']

# First test all datasets
# for i, dataset_name in enumerate(dataset_names):
#     if i < 30:
#         continue
#     model = LinearRegression()
#     run_experiment(dataset_name, model, None, task, random_state=global_seed)

results = defaultdict(list)
for dataset_name in dataset_names:
    results['dataset_name'].append(dataset_name)
    for model_name in model_names:
        model = get_model(model_name,task)
        t1 = time.time()
        score_mean, score_std = run_experiment(dataset_name, model, None, task, random_state=global_seed)
        t2 = time.time()
        results[f'{model_name}_mean'].append(score_mean)
        results[f'{model_name}_std'].append(score_std)
        results[f'{model_name}_time'].append(t2-t1)
        
df = pd.DataFrame(results)
    

    

In [3]:
# Read the results from the file

df = pd.read_csv('../results/Table_2_results.csv')

In [4]:
def extract_equation_number(dataset_name):
    parts = dataset_name.split("_")
    return ".".join(parts[1:])

In [8]:
def generate_latex_table(df,equations):
    df = df.set_index('dataset_name')
    res = r"""\begin{table}[]
\begin{tabular}{lllll}
\toprule
Eq. Number & Equation & GAM & GA${^2}$M & XGB \\
\midrule
"""
    for equation in equations.keys():
        row = df.loc[equation,:]
        eq_num = extract_equation_number(equation)
        symbol = equations[equation]
        res += f"{eq_num} & ${symbol}$ & {row['ebm_no_interactions_mean']: .3f} (" + f"{row['ebm_no_interactions_std']:.3f}"[1:]+f") &  {row['ebm_mean']:.3f} ("+f"{row['ebm_std']:.3f}"[1:]+f") & {row['xgb_mean']:.3f} ("+f"{row['xgb_std']:.3f}"[1:]+f") \\\\ \n"
    res+= r"""\bottomrule
\end{tabular}
\end{table}"""
    return res
    
    

In [6]:
equations_to_print = {
    'feynman_I_6_2b':r'f=e^{-\frac{(\theta-\theta_1)^2}{2\sigma^2}}/\sqrt{2\pi\sigma^2}',
    'feynman_I_8_14':r'd=\sqrt{(x_2-x_1)^2+(y_2-y_1)^2}',
    'feynman_I_12_2':r'F=\frac{q_1 q_2}{4 \pi \epsilon r^2}',
    'feynman_I_12_11':r'F=q(E_f+Bv\sin(\theta))',
    'feynman_I_18_12':r'\tau=rF\sin(\theta)',
    'feynman_I_29_16':r'x=\sqrt{x_1^2+x_2^2-2x_1 x_2 \cos(\theta_1 - \theta_2)}',
    'feynman_I_32_5':r'P=\frac{q^2 a^2}{6\pi\epsilon c^3}',
    'feynman_I_40_1':r'n=n_0 e^{-\frac{magx}{k_b T}}',
    'feynman_II_2_42':r'P=\frac{\kappa(T_2-T_1)A}{d}'
}
print(generate_latex_table(df, equations_to_print))

\begin{table}[]
\begin{tabular}{lllll}
\toprule
Eq. Num. & Equation & GAM & GA${^2}$M & XGB \\
\midrule
I.6.2b & $f=e^{-\frac{(\theta-\theta_1)^2}{2\sigma^2}}/\sqrt{2\pi\sigma^2}$ &  0.731 (.010) &  0.896 (.004) & 0.997 (.000) \\ 
I.8.14 & $d=\sqrt{(x_2-x_1)^2+(y_2-y_1)^2}$ &  0.229 (.011) &  0.966 (.000) & 0.989 (.000) \\ 
I.12.2 & $F=\frac{q_1 q_2}{4 \pi \epsilon r^2}$ &  0.676 (.011) &  0.950 (.003) & 0.993 (.001) \\ 
I.12.11 & $F=q(E_f+Bv\sin(\theta))$ &  0.675 (.004) &  0.955 (.001) & 0.996 (.000) \\ 
I.18.12 & $\tau=rF\sin(\theta)$ &  0.760 (.002) &  0.981 (.000) & 0.999 (.000) \\ 
I.29.16 & $x=\sqrt{x_1^2+x_2^2-2x_1 x_2 \cos(\theta_1 - \theta_2)}$ &  0.298 (.007) &  0.902 (.002) & 0.983 (.001) \\ 
I.32.5 & $P=\frac{q^2 a^2}{6\pi\epsilon c^3}$ &  0.444 (.015) &  0.835 (.009) & 0.988 (.001) \\ 
I.40.1 & $n=n_0 e^{-\frac{magx}{k_b T}}$ &  0.736 (.003) &  0.899 (.003) & 0.981 (.001) \\ 
II.2.42 & $P=\frac{\kappa(T_2-T_1)A}{d}$ &  0.615 (.006) &  0.937 (.002) & 0.990 (.000) \\ 
\bott

In [9]:
# Save the table to a file
with open('tables/Table_2.tex','w') as f:
    f.write(generate_latex_table(df, equations_to_print))
    