In [7]:
import os
import json
import pandas as pd
import pyarrow.feather as feather

In [8]:
def json_to_dataframe(path):
    # Initialize an empty list to store data from JSON files
    data_list = []

    # Traverse through all subdirectories and files
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".json"):
                # Construct the full path to the JSON file
                file_path = os.path.join(root, file)

                # Read data from the JSON file
                with open(file_path, "r") as json_file:
                    try:
                        json_data = json.load(json_file)
                        if "symbolic_model" in json_data:
                            if isinstance(json_data["symbolic_model"], list):
                                sm = [
                                    "B" + str(i) + "*" + ri
                                    for i, ri in enumerate(json_data["symbolic_model"])
                                ]
                                sm = "+".join(sm)
                                json_data["symbolic_model"] = sm
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
                        return

                    # Append the data to the list
                    data_list.append(json_data)

    # Create a DataFrame from the list of JSON data
    df = pd.DataFrame(data_list)
    df.rename(columns={"dataset": "dataset_name"}, inplace=True)
    return df

## BART Permutation

In [None]:
pmlb_BART_perm = json_to_dataframe(os.path.normpath("../results_blackbox/BART_perm"))
pmlb_BART_perm.rename(columns={"col_idx": "idx_gse"}, inplace=True)
pmlb_BART_perm["idx_gse"] = pmlb_BART_perm.apply(
    lambda row: [] if row["failed"] else row["idx_gse"], axis=1
)
pmlb_BART_perm["SNR"] = 0.0
pmlb_BART_perm["n"] = 0.0

feather.write_feather(
    pmlb_BART_perm,
    os.path.normpath("../results_blackbox/pmlb_BART_perm.feather"),
)

## BART VIP Ranking

In [None]:
pmlb_BART_VIP = json_to_dataframe(os.path.normpath("../results_blackbox/BART_VIP"))

feather.write_feather(
    pmlb_BART_VIP,
    os.path.normpath("../results_blackbox/pmlb_BART_VIP_withidx.feather"),
)

## SR

In [None]:
pmlb_SR = json_to_dataframe(os.path.normpath("../results_blackbox/SR"))

# clean up
pmlb_SR.loc[:, "training_time_hr"] = pmlb_SR["time_time"] / 3600
pmlb_SR["r2_zero_test"] = pmlb_SR["r2_test"].apply(lambda x: max(x, 0))
pmlb_SR["algorithm"] = pmlb_SR["algorithm"].apply(lambda x: x.replace("Regressor", ""))
pmlb_SR["algorithm"] = pmlb_SR["algorithm"].apply(lambda x: x.replace("regressor", ""))
pmlb_SR["algorithm"] = pmlb_SR["algorithm"].apply(lambda x: x.replace("tuned.", ""))
pmlb_SR["algorithm"] = pmlb_SR["algorithm"].apply(lambda x: x.replace(".hclst_v2", ""))
pmlb_SR["algorithm"] = pmlb_SR["algorithm"].apply(
    lambda x: x.replace("sembackpropgp", "SBP-GP")
)
pmlb_SR["algorithm"] = pmlb_SR["algorithm"].apply(
    lambda x: x.replace("FE_AFP", "AFP_FE")
)
pmlb_SR["algorithm"] = pmlb_SR["algorithm"].apply(
    lambda x: x.replace("GPGOMEA", "GP-GOMEA")
)

pmlb_SR

## PAN + SR

In [None]:
pmlb_SR_BART = json_to_dataframe(os.path.normpath("../results_blackbox/SR_BART_VIP"))

# clean up
pmlb_SR_BART.loc[:, "training_time_hr"] = pmlb_SR_BART["time_time"] / 3600
pmlb_SR_BART["r2_zero_test"] = pmlb_SR_BART["r2_test"].apply(lambda x: max(x, 0))
pmlb_SR_BART["algorithm"] = pmlb_SR_BART["algorithm"].apply(
    lambda x: x.replace("Regressor", "")
)
pmlb_SR_BART["algorithm"] = pmlb_SR_BART["algorithm"].apply(
    lambda x: x.replace("regressor", "")
)
pmlb_SR_BART["algorithm"] = pmlb_SR_BART["algorithm"].apply(
    lambda x: x.replace("tuned.", "")
)
pmlb_SR_BART["algorithm"] = pmlb_SR_BART["algorithm"].apply(
    lambda x: x.replace(".hclst_v2", "")
)
pmlb_SR_BART["algorithm"] = pmlb_SR_BART["algorithm"].apply(
    lambda x: x.replace("sembackpropgp", "SBP-GP")
)
pmlb_SR_BART["algorithm"] = pmlb_SR_BART["algorithm"].apply(
    lambda x: x.replace("FE_AFP", "AFP_FE")
)
pmlb_SR_BART["algorithm"] = pmlb_SR_BART["algorithm"].apply(
    lambda x: x.replace("GPGOMEA", "GP-GOMEA")
)

pmlb_SR_BART

In [11]:
pmlb_results = pd.concat([pmlb_SR, pmlb_SR_BART], ignore_index=True)
pmlb_results

feather.write_feather(
    pmlb_results,
    os.path.normpath("../results_blackbox/pmlb_results.feather"),
)