In [103]:
import yaml
import importlib
from typing import Any

In [104]:
def load_config(path: str) -> dict[str, Any]:
    with open(path, "r") as f:
        return yaml.safe_load(f)

In [105]:
def import_class(class_path: str):
    module_name, class_name = class_path.rsplit(".", 1)
    module = importlib.import_module(module_name)
    return getattr(module, class_name)

In [131]:
def instantiate_component(component_config: dict[str, Any], depth=0) -> Any:
    cls = import_class(component_config["class"])
    name = component_config.get("name", "")
    params = component_config.get("params", {})

    for key, value in params.items():
        print(f"{name=}, {key=}, {cls}, {type(value)}")
        try:
            if isinstance(value, list):
                for item in value:
                    instantiate_component(item)
            else:
                raise NotImplementedError()
        except Exception as e:
            print(e)


In [132]:
config = load_config('config_test.yaml')

pipeline_config = config.get("pipeline")
search_params_config = config.get("search_parameters", [])

pipeline = instantiate_component(pipeline_config)

pipeline

name='', key='steps', <class 'sklearn.pipeline.Pipeline'>, <class 'list'>
name='data_pipe', key='steps', <class 'sklearn.pipeline.Pipeline'>, <class 'list'>
name='preprocessor', key='transformers', <class 'sklearn.compose._column_transformer.ColumnTransformer'>, <class 'list'>
name='num', key='imputer', <class 'sklearn.pipeline.Pipeline'>, <class 'dict'>

name='num', key='scaler', <class 'sklearn.pipeline.Pipeline'>, <class 'dict'>

name='cat', key='steps', <class 'sklearn.pipeline.Pipeline'>, <class 'list'>
name='imputer', key='strategy', <class 'sklearn.impute._base.SimpleImputer'>, <class 'str'>

name='feature_selector', key='k', <class 'sklearn.feature_selection._univariate_selection.SelectKBest'>, <class 'str'>

name='classifier', key='C', <class 'sklearn.linear_model._logistic.LogisticRegression'>, <class 'float'>

name='classifier', key='solver', <class 'sklearn.linear_model._logistic.LogisticRegression'>, <class 'str'>

name='classifier', key='max_iter', <class 'sklearn.linear_

---

In [27]:
def import_class(class_path: str):
    module_name, class_name = class_path.rsplit(".", 1)
    module = importlib.import_module(module_name)
    return getattr(module, class_name)

In [28]:
def instantiate_component(component_config: dict[str, Any]) -> Any:
    cls = import_class(component_config["class"])
    params = component_config.get("params", {})

    for key, val in params.items():
        if isinstance(val, dict) and "class" in val:
            params[key] = instantiate_component(val)
        elif isinstance(val, list):
            new_list = []
            for item in val:
                if isinstance(item, dict) and "class" in item:
                    new_list.append(instantiate_component(item))
                else:
                    new_list.append(item)
            params[key] = new_list

    return cls(**params)

In [29]:
def load_search_params(search_params_config):
    search_params = []
    for param_dict in search_params_config:
        new_param_dict = {}
        for key, val in param_dict.items():
            if isinstance(val, list):
                new_list = []
                for item in val:
                    if isinstance(item, dict) and "class" in item:
                        new_list.append(instantiate_component(item))
                    else:
                        new_list.append(item)
                new_param_dict[key] = new_list
            else:
                new_param_dict[key] = val
        search_params.append(new_param_dict)
    return search_params

In [30]:
def load_config(path: str) -> dict[str, Any]:
    with open(path, "r") as f:
        return yaml.safe_load(f)

In [31]:
def load_pipeline_and_search_params(config_path: str):
    config = load_config(config_path)

    pipeline_config = config.get("pipeline")
    search_params_config = config.get("search_parameters", [])

    pipeline = instantiate_component(pipeline_config)
    search_params = load_search_params(search_params_config)

    return pipeline, search_params

In [44]:
pipeline, search_params = load_pipeline_and_search_params("config_test.yaml")
print("Pipeline:")
print(pipeline)
print("\nSearch parameters:")
for p in search_params:
    print(p)

Pipeline:
Pipeline(steps=[LogisticRegression(), LogisticRegression()])

Search parameters:
{'data_pipe__preprocessor__num__imputer__strategy': ['mean', 'median']}
