In [79]:
import pandas as pd
import os
import glob
import numpy as np

In [80]:
extension = 'csv'
result_files = glob.glob('*.{}'.format(extension))
print(result_files)
print(len(result_files))

['678_visualizing_environmental.csv', '687_sleuth_ex1605.csv', '659_sleuth_ex1714.csv', '561_cpu.csv', '529_pollen.csv', 'alpinegp-blackbox_results.csv', '503_wind.csv', '1029_LEV.csv', '522_pm10.csv', '542_pollution.csv', '1027_ESL.csv', '1028_SWD.csv', '695_chatfield_4.csv', '225_puma8NH.csv', '227_cpu_small.csv', '229_pwLinear.csv', '712_chscase_geyser1.csv', '547_no2.csv', '1096_FacultySalaries.csv', '666_rmftsa_ladata.csv', '192_vineyard.csv', '519_vinnie.csv', '527_analcatdata_election2000.csv', '706_sleuth_case1202.csv', '523_analcatdata_neavote.csv', '560_bodyfat.csv', '1030_ERA.csv', '485_analcatdata_vehicle.csv', '505_tecator.csv', '556_analcatdata_apnea2.csv', '690_visualizing_galaxy.csv', '663_rabe_266.csv', '557_analcatdata_apnea1.csv', '197_cpu_act.csv', '665_sleuth_case2002.csv', '210_cloud.csv', '1089_USCrime.csv', '230_machine_cpu.csv', '228_elusage.csv']
39


In [81]:
r2_tests= []
aggregated_results = pd.DataFrame()
# combine all the results files into one dataset
for file in result_files: 
    # skip aggregate results file
    if "_results" in file:
        continue
    # skip Friedman datasets
    if "fri" in file:
        continue
    data = pd.read_csv(file, sep=";", header=0)
    aggregated_results = pd.concat([aggregated_results, data])
    r2_tests.append(data["r2_test"].to_numpy())

# add algorithm name
aggregated_results["algorithm"] = "AlpineGP"

In [82]:
print(aggregated_results[["algorithm", "problem", "r2_test"]])

   algorithm                        problem   r2_test
0   AlpineGP  678_visualizing_environmental  0.243464
1   AlpineGP  678_visualizing_environmental  0.338494
2   AlpineGP  678_visualizing_environmental  0.353510
3   AlpineGP  678_visualizing_environmental  0.068854
4   AlpineGP  678_visualizing_environmental  0.214836
..       ...                            ...       ...
5   AlpineGP                    228_elusage  0.661387
6   AlpineGP                    228_elusage  0.825378
7   AlpineGP                    228_elusage  0.769816
8   AlpineGP                    228_elusage  0.779143
9   AlpineGP                    228_elusage  0.601095

[380 rows x 3 columns]


In [83]:
aggregated_results = aggregated_results.rename(columns={"r2_test": "r2_zero_test", "problem": "dataset"})

aggregated_results.to_csv("alpinegp-blackbox_results.csv", index=False)

In [84]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset").agg({"r2_train": "median", "r2_zero_test": "median"}).reset_index()

algorithm_stats["r2_difference"] = algorithm_stats["r2_train"] - algorithm_stats["r2_zero_test"]

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="r2_difference", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset  r2_train  r2_zero_test  r2_difference
0                   542_pollution  0.684331      0.279925       0.404405
1         485_analcatdata_vehicle  0.802208      0.445686       0.356522
2               687_sleuth_ex1605  0.739826      0.440191       0.299635
3               659_sleuth_ex1714  0.895463      0.596779       0.298684
4                    192_vineyard  0.803101      0.607424       0.195676
5                    1089_USCrime  0.889593      0.714857       0.174735
6            1096_FacultySalaries  0.917317      0.747520       0.169797
7             706_sleuth_case1202  0.758698      0.607997       0.150701
8                       210_cloud  0.897807      0.773185       0.124622
9   678_visualizing_environmental  0.437419      0.319226       0.118193
10                    228_elusage  0.858144      0.764459       0.093685
11                       522_pm10  0.235845      0.150268       0.085577
12                       1028_SWD  0.373881      0.

In [85]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset")["r2_zero_test"].agg(['mean', 'median', 'std']).reset_index()

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="median", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset      mean    median       std
0                     560_bodyfat  0.995081  0.997511  0.005728
1                    663_rabe_266  0.987042  0.988770  0.008794
2    527_analcatdata_election2000  0.987500  0.988501  0.004834
3                     505_tecator  0.982222  0.981874  0.004591
4          690_visualizing_galaxy  0.956153  0.955864  0.009107
5         523_analcatdata_neavote  0.936344  0.943564  0.028017
6                         561_cpu  0.898288  0.927481  0.102737
7                 230_machine_cpu  0.826555  0.885059  0.163715
8                 695_chatfield_4  0.873938  0.869568  0.033852
9                        1027_ESL  0.863080  0.864123  0.017643
10                  227_cpu_small  0.840726  0.834428  0.036993
11                    197_cpu_act  0.802687  0.804859  0.011216
12                     529_pollen  0.782721  0.778052  0.013291
13                      210_cloud  0.774567  0.773185  0.115628
14                    228_elusage  0.725

In [86]:
r2_tests = np.concatenate(r2_tests).ravel()
# print(r2_tests)

In [87]:
print("Mean test R2 = ", r2_tests.mean().item())

Mean test R2 =  0.6488988182180486


In [88]:
r2_tests.std().item()

0.309237904985923

In [89]:
print("Median test R2 = ", np.median(r2_tests).item())

Median test R2 =  0.7059318186290151
