In [13]:
import pandas as pd
import os
import glob
import numpy as np

In [14]:
extension = 'csv'
result_files = glob.glob('*.{}'.format(extension))
print(result_files)
print(len(result_files))

['678_visualizing_environmental.csv', '706_sleuth_case1202.csv', '695_chatfield_4.csv', '522_pm10.csv', '547_no2.csv', '229_pwLinear.csv', '666_rmftsa_ladata.csv', '227_cpu_small.csv', 'alpinegp-blackbox_results.csv', '542_pollution.csv', '210_cloud.csv', '523_analcatdata_neavote.csv', '687_sleuth_ex1605.csv', '712_chscase_geyser1.csv', '505_tecator.csv', '1096_FacultySalaries.csv', '1030_ERA.csv', '690_visualizing_galaxy.csv', '225_puma8NH.csv', '1028_SWD.csv', '228_elusage.csv', '529_pollen.csv', '557_analcatdata_apnea1.csv', '560_bodyfat.csv', '485_analcatdata_vehicle.csv', '659_sleuth_ex1714.csv', '527_analcatdata_election2000.csv', '1029_LEV.csv', '503_wind.csv', '556_analcatdata_apnea2.csv', '192_vineyard.csv', '663_rabe_266.csv', '665_sleuth_case2002.csv', '1027_ESL.csv', '230_machine_cpu.csv', '1089_USCrime.csv', '561_cpu.csv', '519_vinnie.csv', '197_cpu_act.csv']
39


In [15]:
r2_tests= []
aggregated_results = pd.DataFrame()
# combine all the results files into one dataset
for file in result_files: 
    # skip aggregate results file
    if "_results" in file:
        continue
    # skip Friedman datasets
    if "fri" in file:
        continue
    data = pd.read_csv(file, sep=";", header=0)
    aggregated_results = pd.concat([aggregated_results, data])
    r2_tests.append(data["r2_test"].to_numpy())

# add algorithm name
aggregated_results["algorithm"] = "AlpineGP"

In [16]:
print(aggregated_results[["algorithm", "problem", "r2_test"]])

   algorithm                        problem   r2_test
0   AlpineGP  678_visualizing_environmental -0.489818
1   AlpineGP  678_visualizing_environmental  0.379238
2   AlpineGP  678_visualizing_environmental  0.055770
3   AlpineGP  678_visualizing_environmental  0.060785
4   AlpineGP  678_visualizing_environmental  0.100872
..       ...                            ...       ...
5   AlpineGP                    197_cpu_act  0.950814
6   AlpineGP                    197_cpu_act  0.937063
7   AlpineGP                    197_cpu_act  0.942992
8   AlpineGP                    197_cpu_act  0.948173
9   AlpineGP                    197_cpu_act  0.946803

[380 rows x 3 columns]


In [17]:
aggregated_results = aggregated_results.rename(columns={"r2_test": "r2_zero_test", "problem": "dataset"})

aggregated_results.to_csv("alpinegp-blackbox_results.csv", index=False)

In [18]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset").agg({"r2_train": "median", "r2_zero_test": "median"}).reset_index()

algorithm_stats["r2_difference"] = algorithm_stats["r2_train"] - algorithm_stats["r2_zero_test"]

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="r2_difference", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset  r2_train  r2_zero_test  r2_difference
0         485_analcatdata_vehicle  0.944626      0.215192       0.729434
1                   542_pollution  0.880066      0.200246       0.679820
2   678_visualizing_environmental  0.576639      0.058277       0.518362
3               687_sleuth_ex1605  0.850778      0.377402       0.473376
4                    192_vineyard  0.883099      0.454081       0.429018
5             665_sleuth_case2002  0.566343      0.230963       0.335380
6             706_sleuth_case1202  0.885401      0.560718       0.324683
7               659_sleuth_ex1714  0.962372      0.642989       0.319384
8                    1089_USCrime  0.960333      0.644552       0.315781
9                     228_elusage  0.901635      0.720556       0.181078
10                       522_pm10  0.388041      0.243645       0.144395
11                        547_no2  0.604492      0.487368       0.117124
12                      210_cloud  0.943018      0.

In [19]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset")["r2_zero_test"].agg(['mean', 'median', 'std']).reset_index()

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="median", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset      mean    median       std
0                    663_rabe_266  0.994866  0.995115  0.001215
1    527_analcatdata_election2000  0.993740  0.994812  0.005867
2                     560_bodyfat  0.778153  0.992855  0.455195
3                     505_tecator  0.987081  0.988520  0.006215
4                         561_cpu  0.962866  0.974705  0.039293
5          690_visualizing_galaxy  0.964883  0.964986  0.007202
6                     197_cpu_act  0.947273  0.947488  0.004859
7                   227_cpu_small  0.947611  0.947114  0.006540
8         523_analcatdata_neavote  0.936577  0.943564  0.027836
9            1096_FacultySalaries  0.667598  0.893905  0.532742
10                230_machine_cpu  0.816217  0.876052  0.157381
11                       1027_ESL  0.861513  0.867640  0.022269
12         556_analcatdata_apnea2  0.856692  0.862532  0.042140
13                   229_pwLinear  0.862626  0.860976  0.022146
14         557_analcatdata_apnea1  0.852

In [20]:
r2_tests = np.concatenate(r2_tests).ravel()
# print(r2_tests)

In [21]:
print("Mean test R2 = ", r2_tests.mean().item())

Mean test R2 =  0.6445996617242278


In [22]:
r2_tests.std().item()

0.36576498231448856

In [23]:
print("Median test R2 = ", np.median(r2_tests).item())

Median test R2 =  0.7581699393900136


In [24]:
# Convert the DataFrame to Markdown
markdown_table = algorithm_stats.to_markdown(index=False)

# Print the Markdown table
print(markdown_table)

# Save the Markdown table to a file
with open('table.md', 'w') as file:
    file.write(markdown_table)


| dataset                       |      mean |    median |        std |
|:------------------------------|----------:|----------:|-----------:|
| 663_rabe_266                  | 0.994866  | 0.995115  | 0.00121517 |
| 527_analcatdata_election2000  | 0.99374   | 0.994812  | 0.00586654 |
| 560_bodyfat                   | 0.778153  | 0.992855  | 0.455195   |
| 505_tecator                   | 0.987081  | 0.98852   | 0.00621484 |
| 561_cpu                       | 0.962866  | 0.974705  | 0.0392929  |
| 690_visualizing_galaxy        | 0.964883  | 0.964986  | 0.00720163 |
| 197_cpu_act                   | 0.947273  | 0.947488  | 0.00485852 |
| 227_cpu_small                 | 0.947611  | 0.947114  | 0.00653997 |
| 523_analcatdata_neavote       | 0.936577  | 0.943564  | 0.0278365  |
| 1096_FacultySalaries          | 0.667598  | 0.893905  | 0.532742   |
| 230_machine_cpu               | 0.816217  | 0.876052  | 0.157381   |
| 1027_ESL                      | 0.861513  | 0.86764   | 0.0222687  |
| 556_