In [331]:
import pandas as pd
import os
import glob
import numpy as np

In [332]:
extension = 'csv'
result_files = glob.glob('*.{}'.format(extension))
print(result_files)
print(len(result_files))

['678_visualizing_environmental.csv', '687_sleuth_ex1605.csv', '659_sleuth_ex1714.csv', '561_cpu.csv', 'alpinegp-blackbox_results.csv', '1029_LEV.csv', '522_pm10.csv', '542_pollution.csv', '1027_ESL.csv', '695_chatfield_4.csv', '229_pwLinear.csv', '712_chscase_geyser1.csv', '547_no2.csv', '1096_FacultySalaries.csv', '666_rmftsa_ladata.csv', '192_vineyard.csv', '519_vinnie.csv', '527_analcatdata_election2000.csv', '195_auto_price.csv', '706_sleuth_case1202.csv', '523_analcatdata_neavote.csv', '560_bodyfat.csv', '485_analcatdata_vehicle.csv', '556_analcatdata_apnea2.csv', '690_visualizing_galaxy.csv', '663_rabe_266.csv', '557_analcatdata_apnea1.csv', '665_sleuth_case2002.csv', '210_cloud.csv', '1089_USCrime.csv', '230_machine_cpu.csv', '228_elusage.csv']
32


In [333]:
r2_tests= []
aggregated_results = pd.DataFrame()
# combine all the results files into one dataset
for file in result_files: 
    # skip aggregate results file
    if "_results" in file:
        continue
    # skip Friedman datasets
    if "fri" in file:
        continue
    data = pd.read_csv(file, sep=";", header=0)
    aggregated_results = pd.concat([aggregated_results, data])
    r2_tests.append(data["r2_test"].to_numpy())

# add algorithm name
aggregated_results["algorithm"] = "AlpineGP"

  aggregated_results = pd.concat([aggregated_results, data])


In [334]:
print(aggregated_results[["algorithm", "problem", "r2_test"]])

   algorithm                        problem   r2_test
0   AlpineGP  678_visualizing_environmental  0.204556
1   AlpineGP  678_visualizing_environmental  0.408901
2   AlpineGP  678_visualizing_environmental  0.226601
3   AlpineGP  678_visualizing_environmental  0.227555
4   AlpineGP  678_visualizing_environmental -0.109123
..       ...                            ...       ...
5   AlpineGP                    228_elusage  0.701418
6   AlpineGP                    228_elusage  0.825378
7   AlpineGP                    228_elusage  0.762651
8   AlpineGP                    228_elusage  0.785289
9   AlpineGP                    228_elusage  0.527974

[300 rows x 3 columns]


In [335]:
aggregated_results = aggregated_results.rename(columns={"r2_test": "r2_zero_test", "problem": "dataset"})

aggregated_results.to_csv("alpinegp-blackbox_results.csv", index=False)

In [336]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset").agg({"r2_train": "median", "r2_zero_test": "median"}).reset_index()

algorithm_stats["r2_difference"] = algorithm_stats["r2_train"] - algorithm_stats["r2_zero_test"]

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="r2_difference", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset  r2_train  r2_zero_test  r2_difference
0         485_analcatdata_vehicle  0.804305      0.397549       0.406756
1                   542_pollution  0.679228      0.355834       0.323394
2                    192_vineyard  0.800718      0.477841       0.322876
3               659_sleuth_ex1714  0.899852      0.587538       0.312315
4               687_sleuth_ex1605  0.733993      0.433322       0.300671
5                    1089_USCrime  0.875139      0.600101       0.275039
6                       210_cloud  0.889730      0.731848       0.157882
7   678_visualizing_environmental  0.433137      0.278718       0.154419
8             706_sleuth_case1202  0.764207      0.631445       0.132763
9                     228_elusage  0.856486      0.763533       0.092953
10           1096_FacultySalaries  0.894071      0.809196       0.084875
11                       522_pm10  0.232981      0.149032       0.083949
12                230_machine_cpu  0.911400      0.

In [337]:
# Group by problem and calculate the mean, median, and standard deviation for r2_zero_test scores
algorithm_stats = aggregated_results.groupby("dataset")["r2_zero_test"].agg(['mean', 'median', 'std']).reset_index()

# Sort algorithms by median r2_zero_test score
algorithm_stats = algorithm_stats.sort_values(by="median", ascending=False).reset_index(drop=True)

print(algorithm_stats)

                          dataset      mean    median       std
0                     560_bodyfat  0.995082  0.997511  0.005727
1    527_analcatdata_election2000  0.989987  0.991528  0.005610
2                    663_rabe_266  0.986026  0.989354  0.011057
3          690_visualizing_galaxy  0.955560  0.955321  0.007762
4         523_analcatdata_neavote  0.936344  0.943564  0.028017
5                         561_cpu  0.868028  0.914378  0.122736
6                 695_chatfield_4  0.868838  0.869436  0.040004
7                        1027_ESL  0.858704  0.864982  0.022962
8          557_analcatdata_apnea1  0.842992  0.853373  0.053027
9                 230_machine_cpu  0.682516  0.828245  0.315859
10         556_analcatdata_apnea2  0.813277  0.815028  0.034472
11           1096_FacultySalaries  0.629100  0.809196  0.491015
12                    228_elusage  0.718989  0.763533  0.093993
13            712_chscase_geyser1  0.758240  0.756233  0.039447
14                     519_vinnie  0.735

In [338]:
r2_tests = np.concatenate(r2_tests).ravel()
# print(r2_tests)

In [339]:
print("Mean test R2 = ", r2_tests.mean().item())

Mean test R2 =  0.6379516825728591


In [340]:
r2_tests.std().item()

0.33572567067609765

In [341]:
print("Median test R2 = ", np.median(r2_tests).item())

Median test R2 =  0.7097814872317425
