In [1]:
import os

import attrs
import pandas as pd

In [2]:
@attrs.define
class RunReader:
    path: str = attrs.field()
    score_mean: float = attrs.field(init=False)
    score_std: float = attrs.field(init=False)
    name: str = attrs.field(init=False)

    def __attrs_post_init__(self):
        with open(
                os.path.join(self.path, "metrics/score_mean"), "r"
        ) as f:
            self.score_mean = float(f.read().split(" ")[1])
        with open(
                os.path.join(self.path, "metrics/score_std"), "r"
        ) as f:
            self.score_std = float(f.read().split(" ")[1])
        with open(
                os.path.join(self.path, "tags/model"), "r"
        ) as f:
            self.name = f.read()

    def dict(self):
        return {
            "name": self.name,
            "score_mean": self.score_mean,
            "score_std": self.score_std,
        }

In [3]:
r = RunReader(
    "mlruns/100940072706001416/3af9add646884a6b9ed3d690a6bed09a"
)
r.dict()

{'name': 'SparseAdditiveBoostingRegressor',
 'score_mean': 0.2199131782651799,
 'score_std': 0.05131371566158607}

In [33]:
@attrs.define
class ExperimentReader:
    path: str = attrs.field()
    runs: list[RunReader] = attrs.field(init=False)

    def __attrs_post_init__(self):
        dirs = [
            d for d in os.listdir(self.path) if d != "meta.yaml"
        ]
        self.runs = [
            RunReader(os.path.join(self.path, d)) for d in dirs
        ]

    def table(self):
        df = pd.DataFrame(
            [
                run.dict() for run in self.runs
            ]
        )
        return df

    def means(self):
        return self.table().groupby("name").score_mean.max() #.rank(ascending=False)

    def stds(self):
        return self.table().groupby("name").score_std.min() #.rank(ascending=True)

In [37]:
dirs = os.listdir("mlruns")[2:-1]
df_list = []
for dir_ in dirs:
    experiment = ExperimentReader(os.path.join("mlruns", dir_))
    mean_ranking = experiment.means()
    df_list.append(mean_ranking)
merged_df = pd.concat(df_list, axis=1).T.reset_index(drop=True)
merged_df

name,DecisionTreeRegressor,ExplainableBoostingRegressor,SparseAdditiveBoostingRegressor,XGBRegressor,randomforestregressor,ridgecv
0,0.492598,0.580912,0.293820,0.621120,0.618507,0.063680
1,0.438221,0.365574,0.053777,0.647405,0.603019,0.036903
2,0.484158,0.504628,0.404053,0.703878,0.644872,0.095868
3,0.348067,0.389941,-0.029653,0.361401,0.470077,0.389633
4,0.422491,0.334714,0.053209,0.613659,0.571743,0.071548
...,...,...,...,...,...,...
117,0.435461,0.616364,0.521694,0.558074,0.593470,0.627920
118,0.484984,0.646965,0.282799,0.629755,0.615132,0.132180
119,0.544795,0.729106,0.361839,0.721147,0.690359,0.131432
120,0.545170,0.709915,0.108542,0.727677,0.695060,0.067148


In [38]:
merged_df["SparseAdditiveBoostingRegressor"].sort_values()

108   -0.600380
90    -0.553327
55    -0.548953
93    -0.401378
105   -0.242656
         ...   
48     0.761979
70     0.881272
39          NaN
44          NaN
65          NaN
Name: SparseAdditiveBoostingRegressor, Length: 122, dtype: float64

In [39]:
threshold = 0.7
solved = merged_df.max(axis=1) > threshold
merged_df[solved].sort_values(by="SparseAdditiveBoostingRegressor")

name,DecisionTreeRegressor,ExplainableBoostingRegressor,SparseAdditiveBoostingRegressor,XGBRegressor,randomforestregressor,ridgecv
105,0.930462,0.662913,-0.242656,0.91641,0.941184,0.08098
79,0.749382,0.814054,-0.078339,0.827954,0.819547,0.37073
7,0.69809,0.778966,-0.00015,0.774709,0.08343,0.466857
33,0.519755,0.730648,0.036162,0.686917,0.65852,0.056989
54,0.517703,0.613331,0.053747,0.700708,0.663116,0.054118
49,0.499762,0.34156,0.073262,0.700811,0.652509,0.066231
120,0.54517,0.709915,0.108542,0.727677,0.69506,0.067148
23,0.989932,0.958052,0.115376,0.988259,0.990098,0.599386
110,0.706012,0.481989,0.141747,0.666947,0.700389,-0.097291
19,0.557997,0.735173,0.157652,0.729833,0.695308,0.082199


In [41]:
merged_df.index[solved]

Index([  2,   7,   8,  16,  18,  19,  22,  23,  26,  27,  33,  35,  37,  42,
        44,  48,  49,  50,  54,  63,  66,  70,  74,  77,  79,  84,  88,  95,
        96, 101, 102, 104, 105, 107, 110, 113, 119, 120],
      dtype='int64')