In [2]:
import pandas as pd
import os
import glob
import numpy as np

In [3]:
extension = 'csv'
result_files = glob.glob('*.{}'.format(extension))
print(result_files)
print(len(result_files))

['678_visualizing_environmental.csv', '687_sleuth_ex1605.csv', '659_sleuth_ex1714.csv', '561_cpu.csv', '529_pollen.csv', 'alpinegp-blackbox_results.csv', '503_wind.csv', '1029_LEV.csv', '522_pm10.csv', '542_pollution.csv', '1027_ESL.csv', '1028_SWD.csv', '695_chatfield_4.csv', '225_puma8NH.csv', '227_cpu_small.csv', '229_pwLinear.csv', '712_chscase_geyser1.csv', '547_no2.csv', '1096_FacultySalaries.csv', '666_rmftsa_ladata.csv', '192_vineyard.csv', '519_vinnie.csv', '527_analcatdata_election2000.csv', '706_sleuth_case1202.csv', '523_analcatdata_neavote.csv', '560_bodyfat.csv', '1030_ERA.csv', '485_analcatdata_vehicle.csv', '505_tecator.csv', '556_analcatdata_apnea2.csv', '690_visualizing_galaxy.csv', '663_rabe_266.csv', '557_analcatdata_apnea1.csv', '197_cpu_act.csv', '665_sleuth_case2002.csv', '210_cloud.csv', '1089_USCrime.csv', '230_machine_cpu.csv', '228_elusage.csv']
39


In [4]:
r2_tests= []
aggregated_results = pd.DataFrame()
# combine all the results files into one dataset
for file in result_files: 
    # skip aggregate results file
    if "_results" in file:
        continue
    # skip Friedman datasets
    if "fri" in file:
        continue
    data = pd.read_csv(file, sep=";", header=0)
    aggregated_results = pd.concat([aggregated_results, data])
    r2_tests.append(data["r2_test"].to_numpy())

# add algorithm name
aggregated_results["algorithm"] = "AlpineGP"

In [5]:
print(aggregated_results[["algorithm", "problem", "r2_test"]])

   algorithm                        problem   r2_test
0   AlpineGP  678_visualizing_environmental -0.269128
1   AlpineGP  678_visualizing_environmental  0.297113
2   AlpineGP  678_visualizing_environmental  0.230136
3   AlpineGP  678_visualizing_environmental  0.034309
4   AlpineGP  678_visualizing_environmental  0.338141
..       ...                            ...       ...
5   AlpineGP                    228_elusage  0.716071
6   AlpineGP                    228_elusage  0.797977
7   AlpineGP                    228_elusage  0.754009
8   AlpineGP                    228_elusage  0.784629
9   AlpineGP                    228_elusage  0.201719

[380 rows x 3 columns]


In [6]:
aggregated_results = aggregated_results.rename(columns={"r2_test": "r2_zero_test", "problem": "dataset"})

aggregated_results.to_csv("alpinegp-blackbox_results.csv", index=False)

In [7]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset").agg({"r2_train": "median", "r2_zero_test": "median"}).reset_index()

algorithm_stats["r2_difference"] = algorithm_stats["r2_train"] - algorithm_stats["r2_zero_test"]

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="r2_difference", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset  r2_train  r2_zero_test  r2_difference
0               687_sleuth_ex1605  0.859387      0.151750       0.707638
1                    192_vineyard  0.876894      0.213078       0.663815
2         485_analcatdata_vehicle  0.948263      0.304339       0.643924
3                   542_pollution  0.871968      0.252399       0.619569
4   678_visualizing_environmental  0.572854      0.111752       0.461102
5               659_sleuth_ex1714  0.961291      0.610788       0.350504
6             665_sleuth_case2002  0.567391      0.216995       0.350396
7             706_sleuth_case1202  0.873983      0.572711       0.301272
8                    1089_USCrime  0.966473      0.739885       0.226588
9                 230_machine_cpu  0.956248      0.759371       0.196877
10                    228_elusage  0.904176      0.721685       0.182490
11                        547_no2  0.604419      0.446190       0.158229
12                       522_pm10  0.382137      0.

In [8]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset")["r2_zero_test"].agg(['mean', 'median', 'std']).reset_index()

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="median", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset      mean    median       std
0                    663_rabe_266  0.994973  0.994792  0.001387
1    527_analcatdata_election2000  0.992298  0.992383  0.006152
2                     505_tecator  0.985639  0.987823  0.006008
3                         561_cpu  0.976692  0.981073  0.015743
4          690_visualizing_galaxy  0.963990  0.966614  0.007913
5                     560_bodyfat  0.595445  0.964600  0.678973
6                   227_cpu_small  0.949931  0.953020  0.010269
7                     197_cpu_act  0.939741  0.948657  0.029758
8         523_analcatdata_neavote  0.936577  0.943564  0.027836
9          556_analcatdata_apnea2  0.863564  0.867927  0.021875
10                   229_pwLinear  0.856554  0.864017  0.029523
11                       1027_ESL  0.859966  0.859878  0.021908
12         557_analcatdata_apnea1  0.860032  0.856020  0.045725
13                      210_cloud  0.785513  0.855809  0.174546
14                695_chatfield_4  0.857

In [9]:
r2_tests = np.concatenate(r2_tests).ravel()
# print(r2_tests)

In [10]:
print("Mean test R2 = ", r2_tests.mean().item())

Mean test R2 =  0.6167499427856007


In [11]:
r2_tests.std().item()

0.5219995371567658

In [12]:
print("Median test R2 = ", np.median(r2_tests).item())

Median test R2 =  0.7539631702366081


In [13]:
# Convert the DataFrame to Markdown
markdown_table = algorithm_stats.to_markdown(index=False)

# Print the Markdown table
print(markdown_table)

# Save the Markdown table to a file
with open('table.md', 'w') as file:
    file.write(markdown_table)


| dataset                       |        mean |   median |        std |
|:------------------------------|------------:|---------:|-----------:|
| 663_rabe_266                  |  0.994973   | 0.994792 | 0.00138667 |
| 527_analcatdata_election2000  |  0.992298   | 0.992383 | 0.00615176 |
| 505_tecator                   |  0.985639   | 0.987823 | 0.00600829 |
| 561_cpu                       |  0.976692   | 0.981073 | 0.0157433  |
| 690_visualizing_galaxy        |  0.96399    | 0.966614 | 0.00791278 |
| 560_bodyfat                   |  0.595445   | 0.9646   | 0.678973   |
| 227_cpu_small                 |  0.949931   | 0.95302  | 0.0102687  |
| 197_cpu_act                   |  0.939741   | 0.948657 | 0.0297579  |
| 523_analcatdata_neavote       |  0.936577   | 0.943564 | 0.0278365  |
| 556_analcatdata_apnea2        |  0.863564   | 0.867927 | 0.0218752  |
| 229_pwLinear                  |  0.856554   | 0.864017 | 0.0295234  |
| 1027_ESL                      |  0.859966   | 0.859878 | 0.021