In [1]:
import os
import json
import pandas as pd
import pyarrow.feather as feather
import numpy as np

In [2]:
def json_to_dataframe(path):
    # Initialize an empty list to store data from JSON files
    data_list = []

    # Traverse through all subdirectories and files
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".json"):
                # Construct the full path to the JSON file
                file_path = os.path.join(root, file)

                # Read data from the JSON file
                with open(file_path, "r") as json_file:
                    json_data = json.load(json_file)
                    if "symbolic_model" in json_data:
                        if isinstance(json_data["symbolic_model"], list):
                            sm = [
                                "B" + str(i) + "*" + ri
                                for i, ri in enumerate(json_data["symbolic_model"])
                            ]
                            sm = "+".join(sm)
                            json_data["symbolic_model"] = sm

                    # Append the data to the list
                    data_list.append(json_data)

    # Create a DataFrame from the list of JSON data
    df = pd.DataFrame(data_list)
    df.rename(columns={"dataset": "dataset_name"}, inplace=True)
    return df

## BART Permutation

In [None]:
feynman_BART_perm = json_to_dataframe(os.path.normpath("../results_feynman/BART_perm"))
feynman_BART_perm["idx_local"] = feynman_BART_perm["idx_local"].apply(
    lambda x: [] if x == ["NA"] else x
)
feynman_BART_perm["idx_gmax"] = feynman_BART_perm["idx_gmax"].apply(
    lambda x: [] if x == ["NA"] else x
)
feynman_BART_perm["idx_gse"] = feynman_BART_perm["idx_gse"].apply(
    lambda x: [] if x == ["NA"] else x
)

feather.write_feather(
    feynman_BART_perm,
    os.path.normpath("../results_feynman/feynman_BART_perm.feather"),
)

## BART VIP Ranking

In [None]:
feynman_BART_VIP = json_to_dataframe(os.path.normpath("../results_feynman/BART_VIP"))
feather.write_feather(
    feynman_BART_VIP,
    os.path.normpath("../results_feynman/feynman_BART_VIP_withidx.feather"),
)

## SR

In [None]:
feynman_SR = json_to_dataframe(os.path.normpath("../results_feynman/SR"))

# clean up
feynman_SR.loc[:, "training_time_hr"] = feynman_SR["time_time"] / 3600
feynman_SR["r2_zero_test"] = feynman_SR["r2_test"].apply(lambda x: max(x, 0))
feynman_SR["algorithm"] = feynman_SR["algorithm"].apply(
    lambda x: x.replace("Regressor", "")
)
feynman_SR["algorithm"] = feynman_SR["algorithm"].apply(
    lambda x: x.replace("regressor", "")
)
feynman_SR["algorithm"] = feynman_SR["algorithm"].apply(
    lambda x: x.replace("tuned.", "")
)
feynman_SR["algorithm"] = feynman_SR["algorithm"].apply(
    lambda x: x.replace(".hclst_v2", "")
)
feynman_SR["algorithm"] = feynman_SR["algorithm"].apply(
    lambda x: x.replace("sembackpropgp", "SBP-GP")
)
feynman_SR["algorithm"] = feynman_SR["algorithm"].apply(
    lambda x: x.replace("FE_AFP", "AFP_FE")
)
feynman_SR["algorithm"] = feynman_SR["algorithm"].apply(
    lambda x: x.replace("GPGOMEA", "GP-GOMEA")
)

# Update metrics with NaN whenever r2_train is NaN
metric_col = ["TP", "FP", "TN", "FN", "F1"]
feynman_SR.loc[feynman_SR["r2_train"].isna(), metric_col] = np.nan

feynman_SR

## PAN+SR

In [None]:
feynman_SR_BART = json_to_dataframe(os.path.normpath("../results_feynman/SR_BART_VIP"))

# clean up
feynman_SR_BART.loc[:, "training_time_hr"] = feynman_SR_BART["time_time"] / 3600
feynman_SR_BART["r2_zero_test"] = feynman_SR_BART["r2_test"].apply(lambda x: max(x, 0))
feynman_SR_BART["algorithm"] = feynman_SR_BART["algorithm"].apply(
    lambda x: x.replace("Regressor", "")
)
feynman_SR_BART["algorithm"] = feynman_SR_BART["algorithm"].apply(
    lambda x: x.replace("regressor", "")
)
feynman_SR_BART["algorithm"] = feynman_SR_BART["algorithm"].apply(
    lambda x: x.replace("tuned.", "")
)
feynman_SR_BART["algorithm"] = feynman_SR_BART["algorithm"].apply(
    lambda x: x.replace(".hclst_v2", "")
)
feynman_SR_BART["algorithm"] = feynman_SR_BART["algorithm"].apply(
    lambda x: x.replace("sembackpropgp", "SBP-GP")
)
feynman_SR_BART["algorithm"] = feynman_SR_BART["algorithm"].apply(
    lambda x: x.replace("FE_AFP", "AFP_FE")
)
feynman_SR_BART["algorithm"] = feynman_SR_BART["algorithm"].apply(
    lambda x: x.replace("GPGOMEA", "GP-GOMEA")
)

# Update metrics with NaN whenever r2_train is NaN
metric_col = ["TP", "FP", "TN", "FN", "F1"]
feynman_SR_BART.loc[feynman_SR_BART["r2_train"].isna(), metric_col] = np.nan

feynman_SR_BART

In [10]:
feynman_results = pd.concat([feynman_SR, feynman_SR_BART], ignore_index=True)
feynman_results

feather.write_feather(
    feynman_results,
    os.path.normpath("../results_feynman/feynman_results.feather"),
)