## Importy

In [1]:
import sys
sys.path.insert(0, "/home/krzysiek/Development/claim-modelling/claim_modelling_kedro/notebooks/utils")

from utils.my_kedro import MyKedro, suppress_all_loggers
import logging

logger = logging.getLogger(__name__)
logger.setLevel("INFO")

my_kedro = MyKedro()

In [2]:
from claim_modelling_kedro.pipelines.utils.dataframes import (
    load_metrics_table_from_mlflow,
    load_predictions_and_target_from_mlflow,
    load_metrics_cv_stats_from_mlflow
)
from claim_modelling_kedro.pipelines.utils.datasets import (
    get_partition,
    get_mlflow_run_id_for_partition,
)
from claim_modelling_kedro.experiments.experiment import create_experiment_run, default_run_name_from_run_no, get_run_mlflow_id

In [3]:
import pandas as pd
pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 100)  # np. maksymalna liczba kolumn
pd.set_option("display.max_colwidth", None)  # pełna szerokość kolumny


In [2]:
my_kedro.reload()

In [4]:
my_kedro.run("init", level="ERROR")
config = catalog.load("config")

In [11]:
def aggregate_cv_stats_dict(cv_stats_dict: dict) -> pd.DataFrame:
    all_rows = []

    for run_name, df in cv_stats_dict.items():
        if df is not None:
            # Flatten multi-index columns and add run_name + finished
            row = df.stack().to_frame().T
            row.columns = [f"{metric}_{stat}" for metric, stat in row.columns]
            row["RUN_NAME"] = run_name
            row["finished"] = True
        else:
            # No data, return empty row with RUN_NAME and finished=False
            row = pd.DataFrame([{"RUN_NAME": run_name, "finished": False}])
        all_rows.append(row)

    df_combined = pd.concat(all_rows, ignore_index=True).set_index("RUN_NAME")
    return df_combined

## Dummy mean regressor

In [11]:
dummy_exp_name = "sev_001_dummy_mean_regressor"
dummy_run_name = "dummy_mean"
dummy_run_id = get_run_mlflow_id(dummy_exp_name, dummy_run_name)

In [32]:
test_dummy_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=dummy_run_id)
train_dummy_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=dummy_run_id)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [30]:
test_dummy_cv_stats.round(3)

Unnamed: 0_level_0,cv_mean,cv_std
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
test_nwMGD,1.493,0.046
test_nwMAE,43.077,0.583
test_nwRMSE,9022.634,678.876
test_nwR2,-0.0,0.0
test_nwMBD,-1.502,50.147
test_nwSC,-0.0,0.0
test_nwABC,0.0,0.0
test_nwCCI,0.024,0.008
test_nwCOI,0.017,0.01
test_nwCUI,0.006,0.007


In [33]:
train_dummy_cv_stats.round(3)

Unnamed: 0_level_0,cv_mean,cv_std
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
sample_train_pure_nwMGD,1.495,0.007
sample_train_pure_nwMAE,43.103,0.161
sample_train_pure_nwRMSE,9001.243,218.032
sample_train_pure_nwR2,0.0,0.0
sample_train_pure_nwMBD,0.0,0.0
sample_train_pure_nwSC,-0.0,0.0
sample_train_pure_nwABC,0.0,0.0
sample_train_pure_nwCCI,0.016,0.002
sample_train_pure_nwCOI,0.012,0.004
sample_train_pure_nwCUI,0.005,0.004


## Statsmodels GLM

### Wuthrich baseline

In [37]:
glm_de_exp_name = "sev_002_statsmodels_glm_de"
baseline_run_name = "wuthrich_baseline"
baseline_run_id = get_run_mlflow_id(glm_de_exp_name, baseline_run_name)
test_wuthrich_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=baseline_run_id)
train_wuthrich_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=baseline_run_id)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [38]:
test_wuthrich_cv_stats.round(3)

Unnamed: 0_level_0,cv_mean,cv_std
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
test_nwMGD,188.97,79.295
test_nwMAE,61.502,4.257
test_nwRMSE,35095.564,28735.109
test_nwR2,-23.844,41.886
test_nwMBD,-1732.854,560.853
test_nwSC,-0.001,0.015
test_nwABC,0.319,0.028
test_nwCCI,0.32,0.024
test_nwCOI,0.0,0.0
test_nwCUI,0.32,0.024


In [39]:
train_wuthrich_cv_stats.round(3)

Unnamed: 0_level_0,cv_mean,cv_std
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
sample_train_pure_nwMGD,209.05,66.377
sample_train_pure_nwMAE,61.844,11.877
sample_train_pure_nwRMSE,71853.379,122585.145
sample_train_pure_nwR2,-209.522,462.541
sample_train_pure_nwMBD,-1972.623,1649.688
sample_train_pure_nwSC,0.011,0.01
sample_train_pure_nwABC,0.247,0.045
sample_train_pure_nwCCI,0.263,0.064
sample_train_pure_nwCOI,0.0,0.0
sample_train_pure_nwCUI,0.263,0.064


In [48]:
glm_de_exp_name = "sev_002_statsmodels_glm_de"
run_name = "wuthrich_standard_scaler_mean "
run_id = get_run_mlflow_id(glm_de_exp_name, run_name)
test_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id)
train_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
test_cv_stats.round(3)

### Categorical area

In [44]:
glm_de_exp_name = "sev_002_statsmodels_glm_de"
run_name = "cat_area_baseline"
run_id = get_run_mlflow_id(glm_de_exp_name, run_name)
test_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id)
train_cv_stats = load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [45]:
test_cv_stats.round(3)

Unnamed: 0_level_0,cv_mean,cv_std
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
test_nwMGD,169.77,76.63
test_nwMAE,69.253,12.054
test_nwRMSE,100667.359,96113.575
test_nwR2,-208.537,287.733
test_nwMBD,-2849.614,1698.534
test_nwSC,0.005,0.005
test_nwABC,0.345,0.049
test_nwCCI,0.351,0.044
test_nwCOI,0.0,0.0
test_nwCUI,0.351,0.044


In [47]:
test_wuthrich_cv_stats.round(3)

Unnamed: 0_level_0,cv_mean,cv_std
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
test_nwMGD,188.97,79.295
test_nwMAE,61.502,4.257
test_nwRMSE,35095.564,28735.109
test_nwR2,-23.844,41.886
test_nwMBD,-1732.854,560.853
test_nwSC,-0.001,0.015
test_nwABC,0.319,0.028
test_nwCCI,0.32,0.024
test_nwCOI,0.0,0.0
test_nwCUI,0.32,0.024


### Inverse link + scalling features and target

In [19]:
import itertools
from typing import List, Union
import pandas as pd

# Flagi konfiguracyjne – kontrolują, które grupy parametrów generować
gen_outliers_policy = False
gen_scaler = False
gen_intercept_scale = True
gen_trg_divisor = True

# Domyślne wartości dla parametrów, gdy nie są generowane
default_outliers_policy = 'keep'
default_outliers_upper_bound = ''
default_scaler_method = 'StandardScaler'
default_scaler_mean = True
default_scaler_std = True
default_scaler_range = [-1, 1]
default_intercept_scale = 1
default_trg_divisor = 1

# Wszystkie możliwe wartości parametrów
OUTLIERS_POLICY = ['keep', 'drop', 'clip'] if gen_outliers_policy else [default_outliers_policy]
OUTLIERS_UPPER_BOUND = [70000, 100000] if gen_outliers_policy else [default_outliers_upper_bound]

SCALER_ENABLED = [False, True] if gen_scaler else [True]
SCALER_METHOD = ['StandardScaler', 'MinMaxScaler', 'RobustScaler', 'MaxAbsScaler'] if gen_scaler else [default_scaler_method]
SCALER_MEAN = [False, True] if gen_scaler else [default_scaler_mean]
SCALER_STD = [False, True] if gen_scaler else [default_scaler_std]
SCALER_RANGE = [[-1, 1], [0, 1]] if gen_scaler else [default_scaler_range]

INTERCEPT_SCALE = ['mean', 1, 10, 100, 1000, 10000] if gen_intercept_scale else [default_intercept_scale]
TRG_DIVISOR = ['mean', 1, 10, 100, 1000, 10000] if gen_trg_divisor else [default_trg_divisor]

DESCRIPTION_TEMPLATE = """|
    Baseline StatsModels Gamma GLM with the canonical inverse link. It uses Wuthrich feature engineering.
    sample:
      - outliers policy: {outliers_policy}{outliers_bound_clause}
    feature engineering:
      - Wuthrich features
      - no reducing categories
      - one-hot encoding
      - {scaling_desc}
    feature selection:
      - all features
    model:
      - no regularization,
      - {intercept_scale}
      - {trg_divisor}
    calibration:
      - no calibration
"""

def quote(val):
    return f"'{val}'" if isinstance(val, str) else val

def generate_scaling_description(scaler_enabled, method, mean, std, range_):
    if not scaler_enabled:
        return "no scaling"
    desc = f"{method}"
    extra = []
    if method in ['StandardScaler', 'RobustScaler']:
        extra.append(f"mean={mean}")
        extra.append(f"std={std}")
    elif method == 'MinMaxScaler':
        extra.append(f"range={range_}")
    if extra:
        desc += " (" + ", ".join(extra) + ")"
    return desc


def generate_runs_config_list():
    # Lista wynikowa
    tag_dicts = []
    run_id = 0
    
    for outliers_policy in OUTLIERS_POLICY:
        upper_bounds = OUTLIERS_UPPER_BOUND if outliers_policy != 'keep' else [OUTLIERS_UPPER_BOUND[0]]
        for outliers_upper_bound in upper_bounds:
            for scaler_enabled in SCALER_ENABLED:
                scaler_methods = SCALER_METHOD if scaler_enabled else [SCALER_METHOD[0]]
                for scaler_method in scaler_methods:
                    means = SCALER_MEAN if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_MEAN[0]]
                    stds = SCALER_STD if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_STD[0]]
                    ranges = SCALER_RANGE if scaler_method == 'MinMaxScaler' else [SCALER_RANGE[0]]
    
                    for mean, std, scaler_range in itertools.product(means, stds, ranges):
                        for intercept_scale in INTERCEPT_SCALE:
                            for trg_divisor in TRG_DIVISOR:
                                run_id += 1
                                run_id_str = f"{run_id:04d}"
    
                                tag_dict = {
                                    "OUTLIERS_POLICY": outliers_policy,
                                    "OUTLIERS_UPPER_BOUND": outliers_upper_bound,
                                    "SCALER_ENABLED": scaler_enabled,
                                    "SCALER_METHOD": scaler_method,
                                    "SCALER_MEAN": mean,
                                    "SCALER_STD": std,
                                    "SCALER_RANGE": scaler_range,
                                    "INTERCEPT_SCALE": intercept_scale,
                                    "TRG_DIVISOR": trg_divisor
                                }
    
    
                                outliers_bound_clause = (
                                    f" [with upper bound {outliers_upper_bound} if {outliers_policy} is not 'keep']"
                                    if outliers_policy != 'keep' else ""
                                )
                                scaling_desc = generate_scaling_description(
                                    scaler_enabled, scaler_method, mean, std, scaler_range
                                )
                                description = DESCRIPTION_TEMPLATE.format(
                                    outliers_policy=outliers_policy,
                                    outliers_bound_clause=outliers_bound_clause,
                                    scaling_desc=scaling_desc,
                                    intercept_scale=intercept_scale,
                                    trg_divisor=trg_divisor
                                )
    
                                run_name = f"run_{run_id_str}_{outliers_policy}"
                                if scaler_enabled:
                                    run_name += f"_{scaler_method}"
                                run_name += f"_intercept_{intercept_scale}_trg_div_{trg_divisor}"
    
                                tag_dict["RUN_NAME"] = run_name
                                tag_dict["DESCRIPTION"] = description
                                tag_dicts.append(tag_dict)
    
    # DataFrame
    df = pd.DataFrame(tag_dicts)
    
    # Konwersja wartości nie będących stringami do stringów w całym DataFrame
    def convert_value(x):
        if not isinstance(x, str):
            return str(x)
        return x
    
    for col in df.columns:
        df[col] = df[col].map(convert_value)
    return df


df = generate_runs_config_list()
df_for_table = df.drop(columns=["DESCRIPTION"]).set_index("RUN_NAME")
df_for_table

Unnamed: 0_level_0,OUTLIERS_POLICY,OUTLIERS_UPPER_BOUND,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,INTERCEPT_SCALE,TRG_DIVISOR
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
run_0001_keep_StandardScaler_intercept_mean_trg_div_mean,keep,,True,StandardScaler,True,True,"[-1, 1]",mean,mean
run_0002_keep_StandardScaler_intercept_mean_trg_div_1,keep,,True,StandardScaler,True,True,"[-1, 1]",mean,1
run_0003_keep_StandardScaler_intercept_mean_trg_div_10,keep,,True,StandardScaler,True,True,"[-1, 1]",mean,10
run_0004_keep_StandardScaler_intercept_mean_trg_div_100,keep,,True,StandardScaler,True,True,"[-1, 1]",mean,100
run_0005_keep_StandardScaler_intercept_mean_trg_div_1000,keep,,True,StandardScaler,True,True,"[-1, 1]",mean,1000
run_0006_keep_StandardScaler_intercept_mean_trg_div_10000,keep,,True,StandardScaler,True,True,"[-1, 1]",mean,10000
run_0007_keep_StandardScaler_intercept_1_trg_div_mean,keep,,True,StandardScaler,True,True,"[-1, 1]",1,mean
run_0008_keep_StandardScaler_intercept_1_trg_div_1,keep,,True,StandardScaler,True,True,"[-1, 1]",1,1
run_0009_keep_StandardScaler_intercept_1_trg_div_10,keep,,True,StandardScaler,True,True,"[-1, 1]",1,10
run_0010_keep_StandardScaler_intercept_1_trg_div_100,keep,,True,StandardScaler,True,True,"[-1, 1]",1,100


In [21]:
experiment_name = "sev_003_statsmodels_glm_inverse_scaled_trg_and_intercept"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

In [6]:
experiment_name = "sev_003_statsmodels_glm_inverse_scaled_trg_and_intercept"
run_ids = {run_name: get_run_mlflow_id(experiment_name, run_name) for run_name in df.RUN_NAME}
test_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}
train_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [10]:
test_cv_stats_dct["run_0001_keep_StandardScaler_intercept_mean_trg_div_mean"].columns

[1;35mIndex[0m[1m([0m[1m[[0m[32m'cv_mean'[0m, [32m'cv_std'[0m[1m][0m, [33mdtype[0m=[32m'object'[0m[1m)[0m

In [13]:
aggregate_cv_stats_dict(test_cv_stats_dct).loc[:,["finished", "test_nwMGD_cv_mean", "test_nwMGD_cv_std", "test_nwMBD_cv_mean", "test_nwMBD_cv_std", "test_nwNCCGI_cv_mean", "test_nwNCCGI_cv_std"]]

Unnamed: 0_level_0,finished,test_nwMGD_cv_mean,test_nwMGD_cv_std,test_nwMBD_cv_mean,test_nwMBD_cv_std,test_nwNCCGI_cv_mean,test_nwNCCGI_cv_std
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
run_0001_keep_StandardScaler_intercept_mean_trg_div_mean,True,190.823371,81.95998,-1747.965243,582.332163,0.013571,0.057921
run_0002_keep_StandardScaler_intercept_mean_trg_div_1,True,190.823371,81.95998,-1747.965242,582.332162,0.013571,0.057921
run_0003_keep_StandardScaler_intercept_mean_trg_div_10,True,190.821052,81.960767,-1757.360839,576.040833,0.013296,0.057736
run_0004_keep_StandardScaler_intercept_mean_trg_div_100,True,190.823371,81.95998,-1747.965242,582.332162,0.013571,0.057921
run_0005_keep_StandardScaler_intercept_mean_trg_div_1000,True,190.823371,81.95998,-1747.965242,582.332162,0.013571,0.057921
run_0006_keep_StandardScaler_intercept_mean_trg_div_10000,True,190.823371,81.95998,-1747.965242,582.332162,0.013571,0.057921
run_0007_keep_StandardScaler_intercept_1_trg_div_mean,False,,,,,,
run_0008_keep_StandardScaler_intercept_1_trg_div_1,False,,,,,,
run_0009_keep_StandardScaler_intercept_1_trg_div_10,False,,,,,,
run_0010_keep_StandardScaler_intercept_1_trg_div_100,False,,,,,,


### Loglink + scalling

In [33]:
# Flagi konfiguracyjne – kontrolują, które grupy parametrów generować
gen_outliers_policy = False
gen_scaler = True
gen_intercept_scale = True
gen_trg_divisor = True

# Domyślne wartości dla parametrów, gdy nie są generowane
default_outliers_policy = 'keep'
default_outliers_upper_bound = ''
default_scaler_method = 'StandardScaler'
default_scaler_mean = True
default_scaler_std = True
default_scaler_range = [-1, 1]
default_intercept_scale = 1
default_trg_divisor = 1

# Wszystkie możliwe wartości parametrów
OUTLIERS_POLICY = ['keep', 'drop', 'clip'] if gen_outliers_policy else [default_outliers_policy]
OUTLIERS_UPPER_BOUND = [70000, 100000] if gen_outliers_policy else [default_outliers_upper_bound]

SCALER_ENABLED = [False, True] if gen_scaler else [True]
SCALER_METHOD = ['StandardScaler', 'MinMaxScaler', 'RobustScaler', 'MaxAbsScaler'] if gen_scaler else [default_scaler_method]
SCALER_MEAN = [False, True] if gen_scaler else [default_scaler_mean]
SCALER_STD = [False, True] if gen_scaler else [default_scaler_std]
SCALER_RANGE = [[-1, 1], [0, 1]] if gen_scaler else [default_scaler_range]

INTERCEPT_SCALE = [1, 10] if gen_intercept_scale else [default_intercept_scale]
TRG_DIVISOR = [1, 10, 100] if gen_trg_divisor else [default_trg_divisor]

DESCRIPTION_TEMPLATE = """|
    Baseline StatsModels Gamma GLM with the log link. It uses Wuthrich feature engineering.
    sample:
      - outliers policy: {outliers_policy}{outliers_bound_clause}
    feature engineering:
      - Wuthrich features
      - no reducing categories
      - one-hot encoding
      - {scaling_desc}
    feature selection:
      - all features
    model:
      - no regularization,
      - {intercept_scale}
      - {trg_divisor}
    calibration:
      - no calibration
"""

df = generate_runs_config_list()
df_for_table = df.drop(columns=["DESCRIPTION"]).set_index("RUN_NAME")
df_for_table

Unnamed: 0_level_0,OUTLIERS_POLICY,OUTLIERS_UPPER_BOUND,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,INTERCEPT_SCALE,TRG_DIVISOR
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
run_0001_keep_intercept_1_trg_div_1,keep,,False,StandardScaler,False,False,"[-1, 1]",1,1
run_0002_keep_intercept_1_trg_div_10,keep,,False,StandardScaler,False,False,"[-1, 1]",1,10
run_0003_keep_intercept_1_trg_div_100,keep,,False,StandardScaler,False,False,"[-1, 1]",1,100
run_0004_keep_intercept_10_trg_div_1,keep,,False,StandardScaler,False,False,"[-1, 1]",10,1
run_0005_keep_intercept_10_trg_div_10,keep,,False,StandardScaler,False,False,"[-1, 1]",10,10
...,...,...,...,...,...,...,...,...,...
run_0086_keep_MaxAbsScaler_intercept_1_trg_div_10,keep,,True,MaxAbsScaler,False,False,"[-1, 1]",1,10
run_0087_keep_MaxAbsScaler_intercept_1_trg_div_100,keep,,True,MaxAbsScaler,False,False,"[-1, 1]",1,100
run_0088_keep_MaxAbsScaler_intercept_10_trg_div_1,keep,,True,MaxAbsScaler,False,False,"[-1, 1]",10,1
run_0089_keep_MaxAbsScaler_intercept_10_trg_div_10,keep,,True,MaxAbsScaler,False,False,"[-1, 1]",10,10


In [34]:
experiment_name = "sev_004_statsmodels_glm_log_link_scaled_trg_and_intercept"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

## Sklearn

### Loglink + scalling

In [26]:
# Flagi konfiguracyjne – kontrolują, które grupy parametrów generować
gen_outliers_policy = False
gen_scaler = False
gen_intercept_scale = False
gen_trg_divisor = False

# Domyślne wartości dla parametrów, gdy nie są generowane
default_outliers_policy = 'keep'
default_outliers_upper_bound = ''
default_scaler_method = 'StandardScaler'
default_scaler_mean = True
default_scaler_std = True
default_scaler_range = [-1, 1]
default_intercept_scale = 1
default_trg_divisor = 1

# Wszystkie możliwe wartości parametrów
OUTLIERS_POLICY = ['keep', 'drop', 'clip'] if gen_outliers_policy else [default_outliers_policy]
OUTLIERS_UPPER_BOUND = [70000, 100000] if gen_outliers_policy else [default_outliers_upper_bound]

SCALER_ENABLED = [False, True] if gen_scaler else [True]
SCALER_METHOD = ['StandardScaler', 'MinMaxScaler', 'RobustScaler', 'MaxAbsScaler'] if gen_scaler else [default_scaler_method]
SCALER_MEAN = [False, True] if gen_scaler else [default_scaler_mean]
SCALER_STD = [False, True] if gen_scaler else [default_scaler_std]
SCALER_RANGE = [[-1, 1], [0, 1]] if gen_scaler else [default_scaler_range]

INTERCEPT_SCALE = [0.001, 0.1, 1, 10] if gen_intercept_scale else [default_intercept_scale]
TRG_DIVISOR = [1, 2, 10, 20, 50, 100] if gen_trg_divisor else [default_trg_divisor]

DESCRIPTION_TEMPLATE = """|
    Baseline Sklearn Gamma GLM with the log link. It uses Wuthrich feature engineering.
    sample:
      - outliers policy: {outliers_policy}{outliers_bound_clause}
    feature engineering:
      - Wuthrich features
      - no reducing categories
      - one-hot encoding
      - {scaling_desc}
    feature selection:
      - all features
    model:
      - no regularization,
      - {intercept_scale}
      - {trg_divisor}
    calibration:
      - no calibration
"""

df = generate_runs_config_list()
df_for_table = df.drop(columns=["DESCRIPTION"]).set_index("RUN_NAME")
df_for_table

Unnamed: 0_level_0,OUTLIERS_POLICY,OUTLIERS_UPPER_BOUND,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,INTERCEPT_SCALE,TRG_DIVISOR
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
run_0001_keep_StandardScaler_intercept_0.001_trg_div_1,keep,,True,StandardScaler,True,True,"[-1, 1]",0.001,1
run_0002_keep_StandardScaler_intercept_0.001_trg_div_2,keep,,True,StandardScaler,True,True,"[-1, 1]",0.001,2
run_0003_keep_StandardScaler_intercept_0.001_trg_div_10,keep,,True,StandardScaler,True,True,"[-1, 1]",0.001,10
run_0004_keep_StandardScaler_intercept_0.001_trg_div_20,keep,,True,StandardScaler,True,True,"[-1, 1]",0.001,20
run_0005_keep_StandardScaler_intercept_0.001_trg_div_50,keep,,True,StandardScaler,True,True,"[-1, 1]",0.001,50
run_0006_keep_StandardScaler_intercept_0.001_trg_div_100,keep,,True,StandardScaler,True,True,"[-1, 1]",0.001,100
run_0007_keep_StandardScaler_intercept_0.1_trg_div_1,keep,,True,StandardScaler,True,True,"[-1, 1]",0.1,1
run_0008_keep_StandardScaler_intercept_0.1_trg_div_2,keep,,True,StandardScaler,True,True,"[-1, 1]",0.1,2
run_0009_keep_StandardScaler_intercept_0.1_trg_div_10,keep,,True,StandardScaler,True,True,"[-1, 1]",0.1,10
run_0010_keep_StandardScaler_intercept_0.1_trg_div_20,keep,,True,StandardScaler,True,True,"[-1, 1]",0.1,20


In [27]:
experiment_name = "sev_005_sklearn_glm_log_link_scaled_trg_and_intercept"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

In [44]:
experiment_name = "sev_005_sklearn_glm_log_link_scaled_trg_and_intercept"
run_ids = {run_name: get_run_mlflow_id(experiment_name, run_name) for run_name in df.RUN_NAME}
test_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}
train_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
aggregate_cv_stats_dict(test_cv_stats_dct).loc[:,["finished", "test_nwMGD_cv_mean", "test_nwMGD_cv_std", "test_nwMBD_cv_mean", "test_nwMBD_cv_std", "test_nwNCCGI_cv_mean", "test_nwNCCGI_cv_std"]].sort_values("test_nwNCCGI_cv_mean")

In [None]:
aggregate_cv_stats_dict(train_cv_stats_dct).loc[:,["finished", "sample_train_pure_nwMGD_cv_mean", "sample_train_pure_nwMGD_cv_std", "sample_train_pure_nwMBD_cv_mean", "sample_train_pure_nwMBD_cv_std", "sample_train_pure_nwNCCGI_cv_mean", "sample_train_pure_nwNCCGI_cv_std"]].sort_values("sample_train_pure_nwNCCGI_cv_mean")

### Loglink + scalers + outliers

In [59]:
# Flagi konfiguracyjne – kontrolują, które grupy parametrów generować
gen_outliers_policy = True
gen_scaler = True
gen_intercept_scale = False
gen_trg_divisor = False

# Domyślne wartości dla parametrów, gdy nie są generowane
default_outliers_policy = 'keep'
default_outliers_upper_bound = ''
default_scaler_method = 'StandardScaler'
default_scaler_mean = True
default_scaler_std = True
default_scaler_range = [-1, 1]
default_intercept_scale = 1
default_trg_divisor = 1

# Wszystkie możliwe wartości parametrów
OUTLIERS_POLICY = ['keep', 'drop', 'clip'] if gen_outliers_policy else [default_outliers_policy]
OUTLIERS_UPPER_BOUND = [70000, 100000] if gen_outliers_policy else [default_outliers_upper_bound]

SCALER_ENABLED = [False, True] if gen_scaler else [True]
SCALER_METHOD = ['StandardScaler', 'MinMaxScaler', 'RobustScaler', 'MaxAbsScaler'] if gen_scaler else [default_scaler_method]
SCALER_MEAN = [False, True] if gen_scaler else [default_scaler_mean]
SCALER_STD = [False, True] if gen_scaler else [default_scaler_std]
SCALER_RANGE = [[-1, 1], [0, 1]] if gen_scaler else [default_scaler_range]

INTERCEPT_SCALE = [0.001, 0.1, 1, 10] if gen_intercept_scale else [default_intercept_scale]
TRG_DIVISOR = [1, 2, 10, 20, 50, 100] if gen_trg_divisor else [default_trg_divisor]

DESCRIPTION_TEMPLATE = """|
    Baseline Sklearn Gamma GLM with the log link. It uses Wuthrich feature engineering.
    sample:
      - outliers policy: {outliers_policy}{outliers_bound_clause}
    feature engineering:
      - Wuthrich features
      - no reducing categories
      - one-hot encoding
      - {scaling_desc}
    feature selection:
      - all features
    model:
      - no regularization,
      - {intercept_scale}
      - {trg_divisor}
    calibration:
      - no calibration
"""

df = generate_runs_config_list()
df_for_table = df.drop(columns=["DESCRIPTION"]).set_index("RUN_NAME")
df_for_table

Unnamed: 0_level_0,OUTLIERS_POLICY,OUTLIERS_UPPER_BOUND,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,INTERCEPT_SCALE,TRG_DIVISOR
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
run_0001_keep_intercept_1_trg_div_1,keep,70000,False,StandardScaler,False,False,"[-1, 1]",1,1
run_0002_keep_intercept_1_trg_div_1,keep,70000,False,StandardScaler,False,True,"[-1, 1]",1,1
run_0003_keep_intercept_1_trg_div_1,keep,70000,False,StandardScaler,True,False,"[-1, 1]",1,1
run_0004_keep_intercept_1_trg_div_1,keep,70000,False,StandardScaler,True,True,"[-1, 1]",1,1
run_0005_keep_StandardScaler_intercept_1_trg_div_1,keep,70000,True,StandardScaler,False,False,"[-1, 1]",1,1
run_0006_keep_StandardScaler_intercept_1_trg_div_1,keep,70000,True,StandardScaler,False,True,"[-1, 1]",1,1
run_0007_keep_StandardScaler_intercept_1_trg_div_1,keep,70000,True,StandardScaler,True,False,"[-1, 1]",1,1
run_0008_keep_StandardScaler_intercept_1_trg_div_1,keep,70000,True,StandardScaler,True,True,"[-1, 1]",1,1
run_0009_keep_MinMaxScaler_intercept_1_trg_div_1,keep,70000,True,MinMaxScaler,False,False,"[-1, 1]",1,1
run_0010_keep_MinMaxScaler_intercept_1_trg_div_1,keep,70000,True,MinMaxScaler,False,False,"[0, 1]",1,1


In [38]:
experiment_name = "sev_006_sklearn_glm_log_link_scalers"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

In [60]:
experiment_name = "sev_006_sklearn_glm_log_link_scalers"
run_ids = {run_name: get_run_mlflow_id(experiment_name, run_name) for run_name in df.RUN_NAME}
test_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}
train_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [66]:
res = aggregate_cv_stats_dict(test_cv_stats_dct).loc[:,["finished", "test_nwMGD_cv_mean", "test_nwMGD_cv_std", "test_nwMBD_cv_mean", "test_nwMBD_cv_std", "test_nwNCCGI_cv_mean", "test_nwNCCGI_cv_std"]].sort_values("test_nwNCCGI_cv_mean", ascending=False)

In [67]:
res.merge(df_for_table, left_index=True, right_index=True).sort_values("test_nwNCCGI_cv_mean", ascending=False)

Unnamed: 0_level_0,finished,test_nwMGD_cv_mean,test_nwMGD_cv_std,test_nwMBD_cv_mean,test_nwMBD_cv_std,test_nwNCCGI_cv_mean,test_nwNCCGI_cv_std,OUTLIERS_POLICY,OUTLIERS_UPPER_BOUND,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,INTERCEPT_SCALE,TRG_DIVISOR
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
run_0026_drop_RobustScaler_intercept_1_trg_div_1,True,1.531744,0.054839,363.973111,48.986387,0.096052,0.037976,drop,70000,True,RobustScaler,False,False,"[-1, 1]",1,1
run_0020_drop_StandardScaler_intercept_1_trg_div_1,True,1.531744,0.054839,363.973111,48.986387,0.096052,0.037976,drop,70000,True,StandardScaler,False,False,"[-1, 1]",1,1
run_0019_drop_intercept_1_trg_div_1,True,1.531744,0.054839,363.973111,48.986387,0.096052,0.037976,drop,70000,False,StandardScaler,True,True,"[-1, 1]",1,1
run_0018_drop_intercept_1_trg_div_1,True,1.531744,0.054839,363.973111,48.986387,0.096052,0.037976,drop,70000,False,StandardScaler,True,False,"[-1, 1]",1,1
run_0017_drop_intercept_1_trg_div_1,True,1.531744,0.054839,363.973111,48.986387,0.096052,0.037976,drop,70000,False,StandardScaler,False,True,"[-1, 1]",1,1
run_0016_drop_intercept_1_trg_div_1,True,1.531744,0.054839,363.973111,48.986387,0.096052,0.037976,drop,70000,False,StandardScaler,False,False,"[-1, 1]",1,1
run_0022_drop_StandardScaler_intercept_1_trg_div_1,True,1.533379,0.055556,364.583604,46.365759,0.093343,0.035786,drop,70000,True,StandardScaler,True,False,"[-1, 1]",1,1
run_0028_drop_RobustScaler_intercept_1_trg_div_1,True,1.534991,0.059023,363.325105,46.849081,0.090617,0.034239,drop,70000,True,RobustScaler,True,False,"[-1, 1]",1,1
run_0021_drop_StandardScaler_intercept_1_trg_div_1,True,1.534993,0.059067,364.127676,48.00502,0.0901,0.035661,drop,70000,True,StandardScaler,False,True,"[-1, 1]",1,1
run_0027_drop_RobustScaler_intercept_1_trg_div_1,True,1.535042,0.05917,363.658807,47.873988,0.089865,0.035552,drop,70000,True,RobustScaler,False,True,"[-1, 1]",1,1


In [65]:
aggregate_cv_stats_dict(train_cv_stats_dct).loc[:,["finished", "sample_train_pure_nwMGD_cv_mean", "sample_train_pure_nwMGD_cv_std", "sample_train_pure_nwMBD_cv_mean", "sample_train_pure_nwMBD_cv_std", "sample_train_pure_nwNCCGI_cv_mean", "sample_train_pure_nwNCCGI_cv_std"]].sort_values("sample_train_pure_nwNCCGI_cv_mean", ascending=False)

Unnamed: 0_level_0,finished,sample_train_pure_nwMGD_cv_mean,sample_train_pure_nwMGD_cv_std,sample_train_pure_nwMBD_cv_mean,sample_train_pure_nwMBD_cv_std,sample_train_pure_nwNCCGI_cv_mean,sample_train_pure_nwNCCGI_cv_std
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
run_0057_clip_RobustScaler_intercept_1_trg_div_1,True,1.390647,0.009653,7.32966,2.051468,0.292821,0.008225
run_0072_clip_RobustScaler_intercept_1_trg_div_1,True,1.390647,0.009653,7.32966,2.051468,0.292821,0.008225
run_0012_keep_RobustScaler_intercept_1_trg_div_1,True,1.390647,0.009653,7.32966,2.051468,0.292821,0.008225
run_0060_clip_MaxAbsScaler_intercept_1_trg_div_1,True,1.390642,0.009652,7.54016,2.166925,0.292782,0.008189
run_0075_clip_MaxAbsScaler_intercept_1_trg_div_1,True,1.390642,0.009652,7.54016,2.166925,0.292782,0.008189
run_0015_keep_MaxAbsScaler_intercept_1_trg_div_1,True,1.390642,0.009652,7.54016,2.166925,0.292782,0.008189
run_0051_clip_StandardScaler_intercept_1_trg_div_1,True,1.390702,0.009653,8.065343,1.69271,0.292768,0.008254
run_0006_keep_StandardScaler_intercept_1_trg_div_1,True,1.390702,0.009653,8.065343,1.69271,0.292768,0.008254
run_0066_clip_StandardScaler_intercept_1_trg_div_1,True,1.390702,0.009653,8.065343,1.69271,0.292768,0.008254
run_0053_clip_StandardScaler_intercept_1_trg_div_1,True,1.39064,0.009652,7.512441,2.16412,0.292748,0.008256


### Feature enginering + optionally log-scaled target

In [68]:
# Flagi konfiguracyjne – kontrolują, które grupy parametrów generować
gen_scaler = False
gen_reduce_cat = True
gen_drop = True

# Domyślne wartości dla parametrów, gdy nie są generowane
default_scaler_method = 'StandardScaler'
default_scaler_mean = True
default_scaler_std = False

default_reduce_categories = False
default_min_frequency = 30
default_drop_reference_cat = False
default_drop_first = False
default_drop_binary = False


SCALER_ENABLED = [False, True] if gen_scaler else [True]
SCALER_METHOD = ['StandardScaler', 'MinMaxScaler', 'RobustScaler', 'MaxAbsScaler'] if gen_scaler else [default_scaler_method]
SCALER_MEAN = [False, True] if gen_scaler else [default_scaler_mean]
SCALER_STD = [False, True] if gen_scaler else [default_scaler_std]
SCALER_RANGE = [[-1, 1], [0, 1]] if gen_scaler else [default_scaler_range]

REDUCE_CATEGORIES = [False, True] if gen_reduce_cat else [default_reduce_categories]
MIN_FREQUENCY_OPTIONS = [10, 15, 20, 25, 30, 40, 50, 75, 100, 150, 200]
DROP_REFERENCE_CAT = [False, True] if gen_drop else [default_drop_reference_cat]
DROP_FIRST = [False, True] if gen_drop else [default_drop_first]
DROP_BINARY = [False, True] if gen_drop else [default_drop_binary]

DESCRIPTION_TEMPLATE = """|
    Baseline StatsModels Gamma GLM with the canonical inverse link. It uses Wuthrich feature engineering.
    sample:
      - outliers policy: {outliers_policy}
    feature engineering:
      - Wuthrich features
      - reduce categories: {reduce_categories} (min freq: {min_frequency})
      - one-hot encoding
      - {scaling_desc}
    feature selection:
      - all features
    model:
      - no regularization
    calibration:
      - no calibration
"""
def generate_runs_config_list_without_outliers():
    tag_dicts = []
    run_id = 0

    for reduce_categories in REDUCE_CATEGORIES:
        min_frequencies = MIN_FREQUENCY_OPTIONS if reduce_categories else [default_min_frequency]
        for min_frequency in min_frequencies:
            for drop_reference_cat, drop_first, drop_binary in itertools.product(DROP_REFERENCE_CAT, DROP_FIRST, DROP_BINARY):
                for scaler_enabled in SCALER_ENABLED:
                    scaler_methods = SCALER_METHOD if scaler_enabled else [SCALER_METHOD[0]]
                    for scaler_method in scaler_methods:
                        means = SCALER_MEAN if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_MEAN[0]]
                        stds = SCALER_STD if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_STD[0]]
                        ranges = SCALER_RANGE if scaler_method == 'MinMaxScaler' else [SCALER_RANGE[0]]

                        for mean, std, scaler_range in itertools.product(means, stds, ranges):
                            run_id += 1
                            run_id_str = f"{run_id:04d}"

                            tag_dict = {
                                "SCALER_ENABLED": scaler_enabled,
                                "SCALER_METHOD": scaler_method,
                                "SCALER_MEAN": mean,
                                "SCALER_STD": std,
                                "SCALER_RANGE": scaler_range,
                                "REDUCE_CATEGORIES": reduce_categories,
                                "MIN_FREQUENCY": min_frequency,
                                "DROP_REFERENCE_CAT": drop_reference_cat,
                                "DROP_FIRST": drop_first,
                                "DROP_BINARY": drop_binary,
                            }

                            scaling_desc = generate_scaling_description(
                                scaler_enabled, scaler_method, mean, std, scaler_range
                            )
                            description = DESCRIPTION_TEMPLATE.format(
                                outliers_policy="n/a",
                                reduce_categories=reduce_categories,
                                min_frequency=min_frequency,
                                scaling_desc=scaling_desc,
                            )

                            run_name = f"run_{run_id_str}"
                            tag_dict["RUN_NAME"] = run_name
                            tag_dict["DESCRIPTION"] = description
                            tag_dicts.append(tag_dict)

    df = pd.DataFrame(tag_dicts)

    def convert_value(x):
        return str(x) if not isinstance(x, str) else x

    for col in df.columns:
        df[col] = df[col].map(convert_value)
    return df

df = generate_runs_config_list_without_outliers()
df_for_table = df.drop(columns=["DESCRIPTION"]).set_index("RUN_NAME")

df_for_table 

Unnamed: 0_level_0,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,REDUCE_CATEGORIES,MIN_FREQUENCY,DROP_REFERENCE_CAT,DROP_FIRST,DROP_BINARY
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
run_0001,True,StandardScaler,True,False,"[-1, 1]",False,30,False,False,False
run_0002,True,StandardScaler,True,False,"[-1, 1]",False,30,False,False,True
run_0003,True,StandardScaler,True,False,"[-1, 1]",False,30,False,True,False
run_0004,True,StandardScaler,True,False,"[-1, 1]",False,30,False,True,True
run_0005,True,StandardScaler,True,False,"[-1, 1]",False,30,True,False,False
run_0006,True,StandardScaler,True,False,"[-1, 1]",False,30,True,False,True
run_0007,True,StandardScaler,True,False,"[-1, 1]",False,30,True,True,False
run_0008,True,StandardScaler,True,False,"[-1, 1]",False,30,True,True,True
run_0009,True,StandardScaler,True,False,"[-1, 1]",True,10,False,False,False
run_0010,True,StandardScaler,True,False,"[-1, 1]",True,10,False,False,True


In [57]:
experiment_name = "sev_007_feature_engineering"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

In [69]:
experiment_name = "sev_007_feature_engineering"
run_ids = {run_name: get_run_mlflow_id(experiment_name, run_name) for run_name in df.RUN_NAME}
test_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}
train_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [70]:
res = aggregate_cv_stats_dict(test_cv_stats_dct).loc[:,["finished", "test_nwMGD_cv_mean", "test_nwMGD_cv_std", "test_nwMBD_cv_mean", "test_nwMBD_cv_std", "test_nwNCCGI_cv_mean", "test_nwNCCGI_cv_std"]].sort_values("test_nwNCCGI_cv_mean")

In [71]:
res.merge(df_for_table, left_index=True, right_index=True).sort_values("test_nwNCCGI_cv_mean", ascending=False)

Unnamed: 0_level_0,finished,test_nwMGD_cv_mean,test_nwMGD_cv_std,test_nwMBD_cv_mean,test_nwMBD_cv_std,test_nwNCCGI_cv_mean,test_nwNCCGI_cv_std,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,REDUCE_CATEGORIES,MIN_FREQUENCY,DROP_REFERENCE_CAT,DROP_FIRST,DROP_BINARY
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
run_0015,True,1.522325,0.032451,0.98178,62.869342,0.091287,0.028837,True,StandardScaler,True,False,"[-1, 1]",True,10,True,True,False
run_0016,True,1.522325,0.032451,0.98178,62.869342,0.091287,0.028837,True,StandardScaler,True,False,"[-1, 1]",True,10,True,True,True
run_0023,True,1.522325,0.032451,0.98178,62.869342,0.091287,0.028837,True,StandardScaler,True,False,"[-1, 1]",True,15,True,True,False
run_0024,True,1.522325,0.032451,0.98178,62.869342,0.091287,0.028837,True,StandardScaler,True,False,"[-1, 1]",True,15,True,True,True
run_0031,True,1.522348,0.032472,1.019177,62.898956,0.091205,0.028717,True,StandardScaler,True,False,"[-1, 1]",True,20,True,True,False
run_0032,True,1.522348,0.032472,1.019177,62.898956,0.091205,0.028717,True,StandardScaler,True,False,"[-1, 1]",True,20,True,True,True
run_0094,True,1.520433,0.021677,-4.704467,49.604069,0.090237,0.034511,True,StandardScaler,True,False,"[-1, 1]",True,200,True,False,True
run_0045,True,1.525332,0.029987,-0.870755,54.541451,0.089464,0.03157,True,StandardScaler,True,False,"[-1, 1]",True,30,True,False,False
run_0029,True,1.526534,0.028613,-0.271059,51.121231,0.089293,0.030538,True,StandardScaler,True,False,"[-1, 1]",True,20,True,False,False
run_0013,True,1.526542,0.028623,-0.289417,51.097466,0.089291,0.030536,True,StandardScaler,True,False,"[-1, 1]",True,10,True,False,False


In [73]:
aggregate_cv_stats_dict(train_cv_stats_dct).loc[:,["finished", "sample_train_pure_nwMGD_cv_mean", "sample_train_pure_nwMGD_cv_std", "sample_train_pure_nwMBD_cv_mean", "sample_train_pure_nwMBD_cv_std", "sample_train_pure_nwNCCGI_cv_mean", "sample_train_pure_nwNCCGI_cv_std"]].sort_values("sample_train_pure_nwNCCGI_cv_mean", ascending=False)

Unnamed: 0_level_0,finished,sample_train_pure_nwMGD_cv_mean,sample_train_pure_nwMGD_cv_std,sample_train_pure_nwMBD_cv_mean,sample_train_pure_nwMBD_cv_std,sample_train_pure_nwNCCGI_cv_mean,sample_train_pure_nwNCCGI_cv_std
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
run_0001,True,1.386573,0.010022,4.293991,11.91559,0.302469,0.014004
run_0042,True,1.386246,0.010943,4.502591,4.353227,0.302171,0.013563
run_0018,True,1.386336,0.01085,6.422862,7.310206,0.302141,0.013468
run_0010,True,1.386336,0.01085,6.422862,7.310206,0.302141,0.013468
run_0005,True,1.386171,0.010663,5.779484,8.190326,0.30214,0.014254
run_0006,True,1.386009,0.010591,11.127112,3.730604,0.302134,0.013095
run_0026,True,1.386338,0.010852,6.9856,7.140684,0.302084,0.013517
run_0034,True,1.386346,0.010857,6.698939,4.733952,0.302005,0.013444
run_0002,True,1.385853,0.011185,11.17588,4.042341,0.301986,0.014129
run_0046,True,1.386123,0.010198,9.79718,8.008792,0.301402,0.013162


### 008 Feature selection

In [6]:
# Re-import needed modules after kernel reset
import itertools
import pandas as pd

# Redefine default values and feature selection options
gen_scaler = False
gen_reduce_cat = False
gen_drop = False

default_scaler_method = 'StandardScaler'
default_scaler_mean = True
default_scaler_std = True
default_scaler_range = [-1, 1]

default_reduce_categories = True
default_min_frequency = 20
default_drop_reference_cat = True
default_drop_first = True
default_drop_binary = False

SCALER_ENABLED = [False, True] if gen_scaler else [True]
SCALER_METHOD = ['StandardScaler', 'MinMaxScaler', 'RobustScaler', 'MaxAbsScaler'] if gen_scaler else [default_scaler_method]
SCALER_MEAN = [False, True] if gen_scaler else [default_scaler_mean]
SCALER_STD = [False, True] if gen_scaler else [default_scaler_std]
SCALER_RANGE = [[-1, 1], [0, 1]] if gen_scaler else [default_scaler_range]

REDUCE_CATEGORIES = [False, True] if gen_reduce_cat else [default_reduce_categories]
MIN_FREQUENCY_OPTIONS = [10, 15, 20, 25, 30, 40, 50, 75, 100, 150, 200] if gen_reduce_cat else [default_min_frequency]
DROP_REFERENCE_CAT = [False, True] if gen_drop else [default_drop_reference_cat]
DROP_FIRST = [False, True] if gen_drop else [default_drop_first]
DROP_BINARY = [False, True] if gen_drop else [default_drop_binary]

FEATURE_SELECTION = [True]
MAX_FEATURES = ["", 10, 15, 20, 30, 40]
MIN_IMPORTANCE = ["0.00001"]
FS_METHOD = ["pyglmnet"]
FS_PYGLMNET_ALPHA = [0, 0.2, 0.5, 0.8, 1]
FS_PYGLMNET_LAMBDA = [0, 0.01, 0.1, 0.5]

DESCRIPTION_TEMPLATE = """|
    Baseline Sklearn Gamma GLM with the log link. It uses Wuthrich feature engineering.
    sample:
      - outliers policy: {outliers_policy}
    feature engineering:
      - Wuthrich features
      - reduce categories: {reduce_categories} (min freq: {min_frequency})
      - one-hot encoding
      - {scaling_desc}
    feature selection:
      - all features
    model:
      - no regularization
    calibration:
      - no calibration
"""

def generate_scaling_description(scaler_enabled, method, mean, std, range_):
    if not scaler_enabled:
        return "no scaling"
    desc = f"{method}"
    extra = []
    if method in ['StandardScaler', 'RobustScaler']:
        extra.append(f"mean={mean}")
        extra.append(f"std={std}")
    elif method == 'MinMaxScaler':
        extra.append(f"range={range_}")
    if extra:
        desc += " (" + ", ".join(extra) + ")"
    return desc

def generate_runs_config_list_with_fs():
    tag_dicts = []
    run_id = 0

    for reduce_categories in REDUCE_CATEGORIES:
        min_frequencies = MIN_FREQUENCY_OPTIONS if reduce_categories else [default_min_frequency]
        for min_frequency in min_frequencies:
            for drop_reference_cat, drop_first, drop_binary in itertools.product(DROP_REFERENCE_CAT, DROP_FIRST, DROP_BINARY):
                for scaler_enabled in SCALER_ENABLED:
                    scaler_methods = SCALER_METHOD if scaler_enabled else [SCALER_METHOD[0]]
                    for scaler_method in scaler_methods:
                        means = SCALER_MEAN if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_MEAN[0]]
                        stds = SCALER_STD if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_STD[0]]
                        ranges = SCALER_RANGE if scaler_method == 'MinMaxScaler' else [SCALER_RANGE[0]]

                        for mean, std, scaler_range in itertools.product(means, stds, ranges):
                            for max_features, min_importance, alpha, lambd in itertools.product(
                                MAX_FEATURES, MIN_IMPORTANCE, FS_PYGLMNET_ALPHA, FS_PYGLMNET_LAMBDA
                            ):
                                run_id += 1
                                run_id_str = f"{run_id:04d}"

                                tag_dict = {
                                    "SCALER_ENABLED": scaler_enabled,
                                    "SCALER_METHOD": scaler_method,
                                    "SCALER_MEAN": mean,
                                    "SCALER_STD": std,
                                    "SCALER_RANGE": scaler_range,
                                    "REDUCE_CATEGORIES": reduce_categories,
                                    "MIN_FREQUENCY": min_frequency,
                                    "DROP_REFERENCE_CAT": drop_reference_cat,
                                    "DROP_FIRST": drop_first,
                                    "DROP_BINARY": drop_binary,
                                    "FEATURE_SELECTION": 'True',
                                    "MAX_FEATURES": max_features,
                                    "MIN_IMPORTANCE": min_importance,
                                    "FS_METHOD": "pyglmnet",
                                    "FS_PYGLMNET_ALPHA": alpha,
                                    "FS_PYGLMNET_LAMBDA": lambd,
                                }

                                scaling_desc = generate_scaling_description(
                                    scaler_enabled, scaler_method, mean, std, scaler_range
                                )
                                description = DESCRIPTION_TEMPLATE.format(
                                    outliers_policy="n/a",
                                    reduce_categories=reduce_categories,
                                    min_frequency=min_frequency,
                                    scaling_desc=scaling_desc,
                                )

                                run_name = f"run_{run_id_str}"
                                tag_dict["RUN_NAME"] = run_name
                                tag_dict["DESCRIPTION"] = description
                                tag_dicts.append(tag_dict)

    df = pd.DataFrame(tag_dicts)

    def convert_value(x):
        return str(x) if not isinstance(x, str) else x

    for col in df.columns:
        df[col] = df[col].map(convert_value)
    return df

df = generate_runs_config_list_with_fs()
df_for_table = df.drop(columns=["DESCRIPTION"]).set_index("RUN_NAME")

df_for_table

Unnamed: 0_level_0,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,REDUCE_CATEGORIES,MIN_FREQUENCY,DROP_REFERENCE_CAT,DROP_FIRST,DROP_BINARY,FEATURE_SELECTION,MAX_FEATURES,MIN_IMPORTANCE,FS_METHOD,FS_PYGLMNET_ALPHA,FS_PYGLMNET_LAMBDA
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
run_0001,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.0,0.0
run_0002,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.0,0.01
run_0003,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.0,0.1
run_0004,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.0,0.5
run_0005,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.2,0.0
run_0006,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.2,0.01
run_0007,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.2,0.1
run_0008,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.2,0.5
run_0009,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.5,0.0
run_0010,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0.5,0.01


In [83]:
experiment_name = "sev_008_feature_selection"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

In [7]:
experiment_name = "sev_008_feature_selection"
run_ids = {run_name: get_run_mlflow_id(experiment_name, run_name) for run_name in df.RUN_NAME}
test_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}
train_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [12]:
res = aggregate_cv_stats_dict(test_cv_stats_dct).loc[:,["finished", "test_nwMGD_cv_mean", "test_nwMGD_cv_std", "test_nwMBD_cv_mean", "test_nwMBD_cv_std", "test_nwNCCGI_cv_mean", "test_nwNCCGI_cv_std"]].sort_values("test_nwNCCGI_cv_mean")

In [13]:
res.merge(df_for_table, left_index=True, right_index=True).sort_values("test_nwNCCGI_cv_mean", ascending=False)

Unnamed: 0_level_0,finished,test_nwMGD_cv_mean,test_nwMGD_cv_std,test_nwMBD_cv_mean,test_nwMBD_cv_std,test_nwNCCGI_cv_mean,test_nwNCCGI_cv_std,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,REDUCE_CATEGORIES,MIN_FREQUENCY,DROP_REFERENCE_CAT,DROP_FIRST,DROP_BINARY,FEATURE_SELECTION,MAX_FEATURES,MIN_IMPORTANCE,FS_METHOD,FS_PYGLMNET_ALPHA,FS_PYGLMNET_LAMBDA
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
run_0109,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.5,0.0
run_0106,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.2,0.01
run_0105,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.2,0.0
run_0118,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,1.0,0.01
run_0117,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,1.0,0.0
run_0101,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.0,0.0
run_0114,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.8,0.01
run_0113,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.8,0.0
run_0102,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.0,0.01
run_0110,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0.5,0.01


In [16]:
aggregate_cv_stats_dict(train_cv_stats_dct).loc[:,["finished", "sample_train_pure_nwMGD_cv_mean", "sample_train_pure_nwMGD_cv_std", "sample_train_pure_nwMBD_cv_mean", "sample_train_pure_nwMBD_cv_std", "sample_train_pure_nwNCCGI_cv_mean", "sample_train_pure_nwNCCGI_cv_std"]].sort_values("sample_train_pure_nwNCCGI_cv_mean")

Unnamed: 0_level_0,finished,sample_train_pure_nwMGD_cv_mean,sample_train_pure_nwMGD_cv_std,sample_train_pure_nwMBD_cv_mean,sample_train_pure_nwMBD_cv_std,sample_train_pure_nwNCCGI_cv_mean,sample_train_pure_nwNCCGI_cv_std
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
run_0028,True,1.490532,0.008996,0.189971,0.221198,0.053431,0.030352
run_0068,True,1.490532,0.008996,0.189971,0.221198,0.053431,0.030352
run_0048,True,1.490532,0.008996,0.189971,0.221198,0.053431,0.030352
run_0008,True,1.490532,0.008996,0.189971,0.221198,0.053431,0.030352
run_0088,True,1.490532,0.008996,0.189971,0.221198,0.053431,0.030352
run_0108,True,1.490532,0.008996,0.189971,0.221198,0.053431,0.030352
run_0095,True,1.487615,0.010688,0.201102,0.276154,0.067589,0.036271
run_0055,True,1.487615,0.010688,0.201102,0.276154,0.067589,0.036271
run_0075,True,1.487615,0.010688,0.201102,0.276154,0.067589,0.036271
run_0015,True,1.487615,0.010688,0.201102,0.276154,0.067589,0.036271


### 009 Feature selection

In [20]:
# Re-import needed modules after kernel reset
import itertools
import pandas as pd

# Redefine default values and feature selection options
gen_scaler = False
gen_reduce_cat = False
gen_drop = False

default_scaler_method = 'StandardScaler'
default_scaler_mean = True
default_scaler_std = True
default_scaler_range = [-1, 1]

default_reduce_categories = True
default_min_frequency = 20
default_drop_reference_cat = True
default_drop_first = True
default_drop_binary = False

SCALER_ENABLED = [False, True] if gen_scaler else [True]
SCALER_METHOD = ['StandardScaler', 'MinMaxScaler', 'RobustScaler', 'MaxAbsScaler'] if gen_scaler else [default_scaler_method]
SCALER_MEAN = [False, True] if gen_scaler else [default_scaler_mean]
SCALER_STD = [False, True] if gen_scaler else [default_scaler_std]
SCALER_RANGE = [[-1, 1], [0, 1]] if gen_scaler else [default_scaler_range]

REDUCE_CATEGORIES = [False, True] if gen_reduce_cat else [default_reduce_categories]
MIN_FREQUENCY_OPTIONS = [10, 15, 20, 25, 30, 40, 50, 75, 100, 150, 200] if gen_reduce_cat else [default_min_frequency]
DROP_REFERENCE_CAT = [False, True] if gen_drop else [default_drop_reference_cat]
DROP_FIRST = [False, True] if gen_drop else [default_drop_first]
DROP_BINARY = [False, True] if gen_drop else [default_drop_binary]

FEATURE_SELECTION = [True]
MAX_FEATURES = [""] + list(range(31,52))
MIN_IMPORTANCE = ["0.00001"]
FS_METHOD = ["pyglmnet"]
FS_PYGLMNET_ALPHA = [0]
FS_PYGLMNET_LAMBDA = [0]

de_mlfow_run_id = "98f203c794384eeb90080a7f05ae45a1"

DESCRIPTION_TEMPLATE = """|
    Baseline Sklearn Gamma GLM with the log link. It uses Wuthrich feature engineering.
    sample:
      - outliers policy: {outliers_policy}
    feature engineering:
      - Wuthrich features
      - reduce categories: {reduce_categories} (min freq: {min_frequency})
      - one-hot encoding
      - {scaling_desc}
    feature selection:
      - all features
    model:
      - no regularization
    calibration:
      - no calibration
"""

def generate_scaling_description(scaler_enabled, method, mean, std, range_):
    if not scaler_enabled:
        return "no scaling"
    desc = f"{method}"
    extra = []
    if method in ['StandardScaler', 'RobustScaler']:
        extra.append(f"mean={mean}")
        extra.append(f"std={std}")
    elif method == 'MinMaxScaler':
        extra.append(f"range={range_}")
    if extra:
        desc += " (" + ", ".join(extra) + ")"
    return desc

def generate_runs_config_list_with_fs():
    tag_dicts = []
    run_id = 0

    for reduce_categories in REDUCE_CATEGORIES:
        min_frequencies = MIN_FREQUENCY_OPTIONS if reduce_categories else [default_min_frequency]
        for min_frequency in min_frequencies:
            for drop_reference_cat, drop_first, drop_binary in itertools.product(DROP_REFERENCE_CAT, DROP_FIRST, DROP_BINARY):
                for scaler_enabled in SCALER_ENABLED:
                    scaler_methods = SCALER_METHOD if scaler_enabled else [SCALER_METHOD[0]]
                    for scaler_method in scaler_methods:
                        means = SCALER_MEAN if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_MEAN[0]]
                        stds = SCALER_STD if scaler_method in ['StandardScaler', 'RobustScaler'] else [SCALER_STD[0]]
                        ranges = SCALER_RANGE if scaler_method == 'MinMaxScaler' else [SCALER_RANGE[0]]

                        for mean, std, scaler_range in itertools.product(means, stds, ranges):
                            for max_features, min_importance, alpha, lambd in itertools.product(
                                MAX_FEATURES, MIN_IMPORTANCE, FS_PYGLMNET_ALPHA, FS_PYGLMNET_LAMBDA
                            ):
                                run_id += 1
                                run_id_str = f"{run_id:04d}"

                                tag_dict = {
                                    "SCALER_ENABLED": scaler_enabled,
                                    "SCALER_METHOD": scaler_method,
                                    "SCALER_MEAN": mean,
                                    "SCALER_STD": std,
                                    "SCALER_RANGE": scaler_range,
                                    "REDUCE_CATEGORIES": reduce_categories,
                                    "MIN_FREQUENCY": min_frequency,
                                    "DROP_REFERENCE_CAT": drop_reference_cat,
                                    "DROP_FIRST": drop_first,
                                    "DROP_BINARY": drop_binary,
                                    "FEATURE_SELECTION": 'True',
                                    "MAX_FEATURES": max_features,
                                    "MIN_IMPORTANCE": min_importance,
                                    "FS_METHOD": "pyglmnet",
                                    "FS_PYGLMNET_ALPHA": alpha,
                                    "FS_PYGLMNET_LAMBDA": lambd,
                                    "DE_MLFLOW_RUN_ID": de_mlfow_run_id
                                }

                                scaling_desc = generate_scaling_description(
                                    scaler_enabled, scaler_method, mean, std, scaler_range
                                )
                                description = DESCRIPTION_TEMPLATE.format(
                                    outliers_policy="n/a",
                                    reduce_categories=reduce_categories,
                                    min_frequency=min_frequency,
                                    scaling_desc=scaling_desc,
                                )

                                run_name = f"run_{run_id_str}"
                                tag_dict["RUN_NAME"] = run_name
                                tag_dict["DESCRIPTION"] = description
                                tag_dicts.append(tag_dict)

    df = pd.DataFrame(tag_dicts)

    def convert_value(x):
        return str(x) if not isinstance(x, str) else x

    for col in df.columns:
        df[col] = df[col].map(convert_value)
    return df

df = generate_runs_config_list_with_fs()
df_for_table = df.drop(columns=["DESCRIPTION"]).set_index("RUN_NAME")

df_for_table

Unnamed: 0_level_0,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,REDUCE_CATEGORIES,MIN_FREQUENCY,DROP_REFERENCE_CAT,DROP_FIRST,DROP_BINARY,FEATURE_SELECTION,MAX_FEATURES,MIN_IMPORTANCE,FS_METHOD,FS_PYGLMNET_ALPHA,FS_PYGLMNET_LAMBDA,DE_MLFLOW_RUN_ID
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
run_0001,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0002,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,31.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0003,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,32.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0004,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,33.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0005,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,34.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0006,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,35.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0007,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,36.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0008,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,37.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0009,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,38.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0010,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,39.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1


In [21]:
experiment_name = "sev_009_feature_selection"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

In [22]:
experiment_name = "sev_009_feature_selection"
run_ids = {run_name: get_run_mlflow_id(experiment_name, run_name) for run_name in df.RUN_NAME}
test_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}
train_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [23]:
res = aggregate_cv_stats_dict(test_cv_stats_dct).loc[:,["finished", "test_nwMGD_cv_mean", "test_nwMGD_cv_std", "test_nwMBD_cv_mean", "test_nwMBD_cv_std", "test_nwNCCGI_cv_mean", "test_nwNCCGI_cv_std"]].sort_values("test_nwNCCGI_cv_mean")

In [24]:
res.merge(df_for_table, left_index=True, right_index=True).sort_values("test_nwNCCGI_cv_mean", ascending=False)

Unnamed: 0_level_0,finished,test_nwMGD_cv_mean,test_nwMGD_cv_std,test_nwMBD_cv_mean,test_nwMBD_cv_std,test_nwNCCGI_cv_mean,test_nwNCCGI_cv_std,SCALER_ENABLED,SCALER_METHOD,SCALER_MEAN,SCALER_STD,SCALER_RANGE,REDUCE_CATEGORIES,MIN_FREQUENCY,DROP_REFERENCE_CAT,DROP_FIRST,DROP_BINARY,FEATURE_SELECTION,MAX_FEATURES,MIN_IMPORTANCE,FS_METHOD,FS_PYGLMNET_ALPHA,FS_PYGLMNET_LAMBDA,DE_MLFLOW_RUN_ID
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
run_0010,True,1.511055,0.026831,1.686281,49.009945,0.102281,0.023195,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,39.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0011,True,1.51257,0.026689,2.276714,47.9547,0.100539,0.024315,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,40.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0009,True,1.511993,0.030871,2.173013,47.336923,0.099815,0.01923,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,38.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0008,True,1.512317,0.031201,1.230485,47.55736,0.0983,0.015288,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,37.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0021,True,1.523063,0.027371,0.250684,52.617232,0.095279,0.03264,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,50.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0020,True,1.523225,0.02804,-0.070277,52.911816,0.094651,0.031166,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,49.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0012,True,1.518415,0.024544,1.174636,53.407557,0.0939,0.026912,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,41.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0005,True,1.512493,0.030571,-0.071472,46.36355,0.092725,0.014169,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,34.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0019,True,1.524396,0.028722,-0.205213,52.948947,0.092047,0.03039,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,48.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1
run_0007,True,1.514727,0.029228,1.811702,47.6511,0.091929,0.018894,True,StandardScaler,True,True,"[-1, 1]",True,20,True,True,False,True,36.0,1e-05,pyglmnet,0,0,98f203c794384eeb90080a7f05ae45a1


In [25]:
aggregate_cv_stats_dict(train_cv_stats_dct).loc[:,["finished", "sample_train_pure_nwMGD_cv_mean", "sample_train_pure_nwMGD_cv_std", "sample_train_pure_nwMBD_cv_mean", "sample_train_pure_nwMBD_cv_std", "sample_train_pure_nwNCCGI_cv_mean", "sample_train_pure_nwNCCGI_cv_std"]].sort_values("sample_train_pure_nwNCCGI_cv_mean")

Unnamed: 0_level_0,finished,sample_train_pure_nwMGD_cv_mean,sample_train_pure_nwMGD_cv_std,sample_train_pure_nwMBD_cv_mean,sample_train_pure_nwMBD_cv_std,sample_train_pure_nwNCCGI_cv_mean,sample_train_pure_nwNCCGI_cv_std
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
run_0002,True,1.411761,0.010314,3.099403,3.902746,0.27105,0.018637
run_0003,True,1.409111,0.007514,3.986771,3.595213,0.274034,0.012157
run_0004,True,1.408303,0.007515,4.711015,3.673752,0.274638,0.013208
run_0005,True,1.407037,0.007242,5.192289,3.248119,0.277795,0.013036
run_0006,True,1.406295,0.00704,5.453645,2.919042,0.278604,0.012828
run_0007,True,1.405912,0.007384,5.608813,3.164146,0.279445,0.013
run_0008,True,1.404847,0.006961,6.210293,2.608195,0.280488,0.012575
run_0009,True,1.404236,0.006343,6.653447,2.671515,0.281111,0.012891
run_0010,True,1.403185,0.005828,6.242315,2.878867,0.282615,0.013381
run_0011,True,1.402635,0.005775,5.97299,3.022047,0.283835,0.013635


### 010 ds Hyperopt + no calibration

In [40]:
# Re-import needed modules after kernel reset
import itertools
import pandas as pd

DS_MODELS = ["SklearnGammaGLM", "PyGLMNetGammaGLM"]
HP_MAX_EVALS = {
    "SklearnGammaGLM": 200,
    "PyGLMNetGammaGLM": 200
}


tag_dicts = []
run_id = 0


for ds_model in DS_MODELS:
    run_id += 1
    run_id_str = f"{run_id:04d}"
    hp_max_evals = HP_MAX_EVALS[ds_model]

    tag_dict = {
        "DS_MODEL": ds_model,
        "HP_MAX_EVALS": hp_max_evals
    }

    run_name = f"run_{run_id_str}"
    tag_dict["RUN_NAME"] = run_name
    tag_dicts.append(tag_dict)

df = pd.DataFrame(tag_dicts)

def convert_value(x):
    return str(x) if not isinstance(x, str) else x

for col in df.columns:
    df[col] = df[col].map(convert_value)

df_for_table = df.set_index("RUN_NAME")

df_for_table

Unnamed: 0_level_0,DS_MODEL,HP_MAX_EVALS
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1
run_0001,SklearnGammaGLM,200
run_0002,PyGLMNetGammaGLM,200


In [43]:
experiment_name = "sev_010_glm_hyperopt"

for _, row in df.iterrows():
    run_name = row["RUN_NAME"]
    
    # Przygotowanie słownika parametrów na podstawie wiersza
    template_parameters = row.to_dict()

    # Dodaj wymagane pola
    template_parameters["run_name"] = run_name
    template_parameters["experiment_name"] = experiment_name

    # Wywołanie funkcji
    create_experiment_run(
        experiment_name=experiment_name,
        run_name=run_name,
        template_parameters=template_parameters
    )

In [45]:
experiment_name = "sev_010_glm_hyperopt"
run_ids = {run_name: get_run_mlflow_id(experiment_name, run_name) for run_name in df.RUN_NAME}
test_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="test", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}
train_cv_stats_dct = {run_name: load_metrics_cv_stats_from_mlflow(dataset="sample_train_pure", mlflow_run_id=run_id, time_limit=1, raise_on_failure=False) for run_name, run_id in run_ids.items()}

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [46]:
res = aggregate_cv_stats_dict(test_cv_stats_dct).loc[:,["finished", "test_nwMGD_cv_mean", "test_nwMGD_cv_std", "test_nwMBD_cv_mean", "test_nwMBD_cv_std", "test_nwNCCGI_cv_mean", "test_nwNCCGI_cv_std"]].sort_values("test_nwNCCGI_cv_mean")

In [47]:
res.merge(df_for_table, left_index=True, right_index=True).sort_values("test_nwNCCGI_cv_mean", ascending=False)

Unnamed: 0_level_0,finished,test_nwMGD_cv_mean,test_nwMGD_cv_std,test_nwMBD_cv_mean,test_nwMBD_cv_std,test_nwNCCGI_cv_mean,test_nwNCCGI_cv_std,DS_MODEL,HP_MAX_EVALS
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
run_0001,True,1.506416,0.036755,19.814084,49.158073,0.080327,0.025304,SklearnGammaGLM,200
run_0002,False,,,,,,,PyGLMNetGammaGLM,200


In [48]:
aggregate_cv_stats_dict(train_cv_stats_dct).loc[:,["finished", "sample_train_pure_nwMGD_cv_mean", "sample_train_pure_nwMGD_cv_std", "sample_train_pure_nwMBD_cv_mean", "sample_train_pure_nwMBD_cv_std", "sample_train_pure_nwNCCGI_cv_mean", "sample_train_pure_nwNCCGI_cv_std"]].sort_values("sample_train_pure_nwNCCGI_cv_mean")

Unnamed: 0_level_0,finished,sample_train_pure_nwMGD_cv_mean,sample_train_pure_nwMGD_cv_std,sample_train_pure_nwMBD_cv_mean,sample_train_pure_nwMBD_cv_std,sample_train_pure_nwNCCGI_cv_mean,sample_train_pure_nwNCCGI_cv_std
RUN_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
run_0001,True,1.414872,0.016599,25.40815,3.223555,0.270327,0.025259
run_0002,False,,,,,,
