In [None]:
import os
import json
import pandas as pd
import pyarrow.feather as feather
from pathlib import Path

In [None]:
def json_to_dataframe(path):
    # Initialize an empty list to store data from JSON files
    data_list = []
    failed_files = []

    # Traverse through all subdirectories and files
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".json"):
                # Construct the full path to the JSON file
                file_path = os.path.join(root, file)

                try:
                    # Read data from the JSON file
                    with open(file_path, "r") as json_file:
                        json_data = json.load(json_file)
                        if "symbolic_model" in json_data:
                            if isinstance(json_data["symbolic_model"], list):
                                sm = [
                                    "B" + str(i) + "*" + ri
                                    for i, ri in enumerate(json_data["symbolic_model"])
                                ]
                                sm = "+".join(sm)
                                json_data["symbolic_model"] = sm

                        # Append the data to the list
                        data_list.append(json_data)
                except json.JSONDecodeError as e:
                    failed_files.append(file_path)

    if failed_files:
        print("\nFailed files:")
        for f in failed_files:
            print(f)

    # Create a DataFrame from the list of JSON data
    df = pd.DataFrame(data_list)
    df.rename(columns={"dataset": "dataset_name"}, inplace=True)
    return df

In [None]:
notebook_dir = Path().resolve()
# in_root_path = notebook_dir.parent / "results/"
in_root_path = "/home/mattsheng/Dropbox/srbench/srbench-2.1/results_feynman"
out_root_path = notebook_dir.parent / "results/no_idx"

### BART permutation

In [None]:
for n in [500, 1000, 1500, 2000]:
    feynman_BART_perm = json_to_dataframe(
        os.path.join(in_root_path, f"BART_perm/n_{n}/")
    )
    for col in ["idx_local", "idx_gse", "idx_gmax"]:
        feynman_BART_perm[col] = feynman_BART_perm[col].apply(
            lambda x: [] if x == ["NA"] else x
        )
    feather.write_feather(
        feynman_BART_perm,
        os.path.join(out_root_path, f"BART_perm/feynman_BART_perm_n{n}_preidx.feather"),
    )

### BART MI

In [None]:
for n in [500, 1000, 1500, 2000]:
    feynman_BART_MI = json_to_dataframe(os.path.join(in_root_path, f"BART_MI/n_{n}/"))
    for col in ["idx_vip", "idx_mi", "mi"]:
        feynman_BART_MI[col] = feynman_BART_MI[col].apply(
            lambda x: [] if x == ["NA"] else x
        )
    feather.write_feather(
        feynman_BART_MI,
        os.path.join(out_root_path, f"BART_MI/feynman_BART_MI_n{n}_preidx.feather"),
    )

### BART VIP Rank & VC-measure

In [None]:
for n in [500, 1000, 1500, 2000]:
    feynman_BART_cluster = json_to_dataframe(
        os.path.join(in_root_path, f"BART_VIP_Rank/n_{n}/")
    )
    feather.write_feather(
        feynman_BART_cluster,
        os.path.join(
            out_root_path, f"BART_VIP_Rank/feynman_BART_VIP_Rank_n{n}_preidx.feather"
        ),
    )

### DART and DART VC-measure

In [None]:
for n in [500, 1000, 1500, 2000]:
    feynman_DART_cluster = json_to_dataframe(
        os.path.join(in_root_path, f"DART_VC-measure/n_{n}/")
    )
    feather.write_feather(
        feynman_DART_cluster,
        os.path.join(
            out_root_path,
            f"DART_VC-measure/feynman_DART_VC-measure_n{n}_preidx.feather",
        ),
    )

### ABC Bayesian forests

In [None]:
for n in [500, 1000, 1500, 2000]:
    feynman_ABC = json_to_dataframe(
        os.path.join(in_root_path, f"ABC_Bayesian_forests/n_{n}/")
    )
    feather.write_feather(
        feynman_ABC,
        os.path.join(
            out_root_path,
            f"ABC_Bayesian_forests/feynman_ABC_Bayesian_forests_n{n}_preidx.feather",
        ),
    )