In [42]:
import os

import attrs
import numpy as np
import pandas as pd

In [35]:
@attrs.define
class RunReader:
    path: str = attrs.field()
    score_mean: float = attrs.field(init=False)
    score_std: float = attrs.field(init=False)
    name: str = attrs.field(init=False)

    def __attrs_post_init__(self):
        with open(
                os.path.join(self.path, "metrics/score_mean"), "r"
        ) as f:
            self.score_mean = float(f.read().split(" ")[1])
        with open(
                os.path.join(self.path, "metrics/score_std"), "r"
        ) as f:
            self.score_std = float(f.read().split(" ")[1])
        with open(
                os.path.join(self.path, "tags/model"), "r"
        ) as f:
            self.name = f.read()

    def dict(self):
        return {
            "name": self.name,
            "score_mean": self.score_mean,
            "score_std": self.score_std,
        }

In [37]:
r = RunReader(
    "mlruns/100940072706001416/3af9add646884a6b9ed3d690a6bed09a"
)
r.dict()

{'name': 'SparseAdditiveBoostingRegressor',
 'score_mean': 0.2199131782651799,
 'score_std': 0.05131371566158607}

In [89]:
@attrs.define
class ExperimentReader:
    path: str = attrs.field()
    runs: list[RunReader] = attrs.field(init=False)

    def __attrs_post_init__(self):
        dirs = [
            d for d in os.listdir(self.path) if d != "meta.yaml"
        ]
        self.runs = [
            RunReader(os.path.join(self.path, d)) for d in dirs
        ]

    def table(self):
        df = pd.DataFrame(
            [
                run.dict() for run in self.runs
            ]
        )
        return df

    def mean_ranking(self):
        return self.table().groupby("name").score_mean.max() #.rank(ascending=False)

    def std_ranking(self):
        return self.table().groupby("name").score_std.min() #.rank(ascending=True)

In [90]:
df = pd.DataFrame(
    columns=[
        "DecisionTreeRegressor",
        "RandomForestRegressor",
        "ExplainableBoostingRegressor",
        "SparseAdditiveBoostingRegressor",
        "XGBRegressor",
        "randomforestregressor",
        "ridgecv",
    ]
)
df

Unnamed: 0,DecisionTreeRegressor,RandomForestRegressor,ExplainableBoostingRegressor,SparseAdditiveBoostingRegressor,XGBRegressor,randomforestregressor,ridgecv


In [92]:
dirs = os.listdir("mlruns")[2:-1]
df_list = []
exceptions = []
for dir in dirs:
    try:
        experiment = ExperimentReader(os.path.join("mlruns", dir))
        mean_ranking = experiment.mean_ranking()
        df_list.append(mean_ranking)
    except FileNotFoundError as e:
        exceptions.append(e.filename)
        continue
merged_df = pd.concat(df_list, axis=1).T.reset_index(drop=True)
merged_df

name,DecisionTreeRegressor,ExplainableBoostingRegressor,SparseAdditiveBoostingRegressor,XGBRegressor,randomforestregressor,ridgecv
0,0.492598,0.580912,0.293820,0.621120,0.618507,0.063680
1,0.438221,0.365574,0.053777,0.647405,0.603019,0.036903
2,0.484158,0.504628,0.404053,0.703878,0.644872,0.095868
3,0.348067,0.389941,-0.029653,0.361401,0.470077,0.389633
4,0.422491,0.334714,0.053209,0.613659,0.571743,0.071548
...,...,...,...,...,...,...
103,0.435461,0.616364,0.521694,0.558074,0.593470,0.627920
104,0.484984,0.646965,0.282799,0.629755,0.615132,0.132180
105,0.544795,0.729106,0.361839,0.721147,0.690359,0.131432
106,0.545170,0.709915,0.108542,0.727677,0.695060,0.067148


In [96]:
merged_df.max(axis=0)

name
DecisionTreeRegressor              0.989932
ExplainableBoostingRegressor       0.958052
SparseAdditiveBoostingRegressor    0.881272
XGBRegressor                       0.988259
randomforestregressor              0.990098
ridgecv                            0.921774
dtype: float64

In [82]:
df_list[0]

name
DecisionTreeRegressor              4.0
ExplainableBoostingRegressor       3.0
SparseAdditiveBoostingRegressor    5.0
XGBRegressor                       1.0
randomforestregressor              2.0
ridgecv                            6.0
Name: score_mean, dtype: float64

In [79]:
df_list[0]

Unnamed: 0,DecisionTreeRegressor,RandomForestRegressor,ExplainableBoostingRegressor,SparseAdditiveBoostingRegressor,XGBRegressor,randomforestregressor,ridgecv


In [68]:
exceptions

['mlruns\\193398226061548739\\67bf8ed517364892887771fe434e92e6\\metrics/score_mean',
 'mlruns\\277116109718977322\\7b8a01b0a1f849e898aac62fdb0f4b50\\metrics/score_mean',
 'mlruns\\294147762646535044\\a85e2f7bd9954c9798981949eb136d8e\\metrics/score_mean',
 'mlruns\\340200570096404728\\5a0225d32d8a46bca7b287e1ab60a83c\\metrics/score_mean',
 'mlruns\\363236222016773736\\f23df4fd999e45699cfa4173a5c1037a\\metrics/score_mean',
 'mlruns\\376411700217649586\\294b02db1f2a42d08797e30a57183be4\\metrics/score_mean',
 'mlruns\\523010114342430557\\befe05eba6084ceb8a9b4ad1d4f76598\\metrics/score_mean',
 'mlruns\\545860784819498741\\a421d7d462a34258a87e26e88f7f6c58\\metrics/score_mean',
 'mlruns\\549225710293164941\\7ea5fc2b44fe4faf8686ebf9e5f43d5c\\metrics/score_mean',
 'mlruns\\584153370672643316\\0c58d7f8147b491f818c24daa76bb449\\metrics/score_mean',
 'mlruns\\641751541645141695\\c9fb8f6d672a4df49cfcd5d6ef4a589d\\metrics/score_mean',
 'mlruns\\789699715279712378\\bee65e5be93a4a309252d69d4bb3b67c\\m

In [55]:
exp  =ExperimentReader("mlruns/100940072706001416")
exp.mean_ranking()

name
DecisionTreeRegressor              4.0
ExplainableBoostingRegressor       3.0
SparseAdditiveBoostingRegressor    5.0
XGBRegressor                       1.0
randomforestregressor              2.0
ridgecv                            6.0
Name: score_mean, dtype: float64

In [47]:
df.groupby("name").score_mean.max().rank(ascending=False)

name
DecisionTreeRegressor              4.0
ExplainableBoostingRegressor       3.0
SparseAdditiveBoostingRegressor    5.0
XGBRegressor                       1.0
randomforestregressor              2.0
ridgecv                            6.0
Name: score_mean, dtype: float64

In [41]:
df["score_mean"].rank(ascending=False)

4     1.0
1     2.0
5     3.0
7     4.0
3     5.0
6     6.0
2     7.0
8     8.0
0     NaN
9     NaN
10    NaN
Name: score_mean, dtype: float64