Meta-Select Basic Usage Example

This notebook shows a simple example of how to use the Meta-Select package to:
1. Load features and meta-targets
2. Run a single meta-feature selector
3. Train a single meta-model using the selected features

In [14]:
import random
import numpy as np
import pandas as pd

from ms.config.experiment_config import ExperimentConfig
from ms.selection.selector import Selector
from ms.metalearning.meta_model import MetaModel
from ms.pipeline.runner import run_selector
from ms.processing.split import split_k_fold
from ms.utils.navigation import pjoin, get_file_name

In [15]:
# Set random seed for reproducibility
np.random.seed(ExperimentConfig.SEED)
random.seed(ExperimentConfig.SEED)

In [16]:
source = "tabzilla"
feature_suffix = None
metrics_suffix = "raw" # regression task

features_path = pjoin(
    ExperimentConfig.CONF.resources_path,
    source,
    "filtered",
    f"{get_file_name(prefix='features', suffix=feature_suffix)}.csv",
)

metrics_path = pjoin(
    ExperimentConfig.CONF.resources_path,
    source,
    "target",
    f"{get_file_name(prefix='metrics', suffix=metrics_suffix)}.csv",
)

features, metrics = ExperimentConfig.get_data(
    features_path=features_path,
    metrics_path=metrics_path,
)

In [17]:
target_name = metrics.columns[0]
target = metrics.loc[:, target_name].to_frame()

In [19]:
selector: Selector = ExperimentConfig.CORR

model: MetaModel = ExperimentConfig.KNN_REG

In [20]:
split = split_k_fold(
    x_df=features,
    y_df=target,
    outer_k=ExperimentConfig.OUTER_K,
    inner_k=ExperimentConfig.INNER_K,
    shuffle=True,
    seed=ExperimentConfig.SEED,
)

In [21]:
selector_results = run_selector(
    selector=selector,
    features=features,
    metrics=target,
    split=split,
    preprocessor=ExperimentConfig.PREPROCESSOR,
    k_best=None,
    save_path=None,  # Not saving in this example
)

Split 0, x_train: (96, 118), x_test: (25, 118), y_train: (96, 1), y_test: (25, 1), y type: reg, has inner_split: True
Split 1, x_train: (97, 118), x_test: (24, 118), y_train: (97, 1), y_test: (24, 1), y type: reg, has inner_split: True
Split 2, x_train: (97, 118), x_test: (24, 118), y_train: (97, 1), y_test: (24, 1), y type: reg, has inner_split: True
Split 3, x_train: (97, 118), x_test: (24, 118), y_train: (97, 1), y_test: (24, 1), y type: reg, has inner_split: True
Split 4, x_train: (97, 118), x_test: (24, 118), y_train: (97, 1), y_test: (24, 1), y type: reg, has inner_split: True


In [22]:
selector_results # selected features per fold

{0: ['f__pymfe.model-based.nodes_per_inst',
  'f__pymfe.landmarking.naive_bayes.min',
  'f__pymfe.landmarking.naive_bayes.mean',
  'f__pymfe.statistical.can_cor.min',
  'f__pymfe.landmarking.one_nn.mean',
  'f__pymfe.model-based.leaves_corrob.max',
  'f__pymfe.landmarking.linear_discr.mean',
  'f__pymfe.statistical.can_cor.mean',
  'f__pymfe.model-based.nodes_per_attr',
  'f__pymfe.general.nr_inst',
  'f__pymfe.general.nr_attr',
  'f__pymfe.model-based.var_importance.min',
  'f__pymfe.model-based.nodes',
  'f__pymfe.model-based.tree_depth.mean',
  'f__pymfe.statistical.g_mean.mean',
  'f__pymfe.info-theory.class_ent',
  'f__pymfe.model-based.nodes_repeated.skewness',
  'f__pymfe.general.freq_class.sd',
  'f__pymfe.model-based.tree_imbalance.max'],
 1: ['f__pymfe.model-based.nodes_per_inst',
  'f__pymfe.landmarking.naive_bayes.min',
  'f__pymfe.landmarking.naive_bayes.mean',
  'f__pymfe.statistical.can_cor.min',
  'f__pymfe.landmarking.one_nn.mean',
  'f__pymfe.model-based.leaves_corrob

In [None]:
res = model.run(
    x=features,
    y=target,
    split=split,
    opt_scoring=ExperimentConfig.OPT_SCORING_REG,
    model_scoring=ExperimentConfig.MODEL_SCORING_REG,
    n_trials=10,  # Number of optimization trials
    preprocessor=ExperimentConfig.PREPROCESSOR,
    subset=selector_results, # selected features per fold
    save_path=None,  # Not saving in this example
)

In [26]:
res

Unnamed: 0,train_0,test_0,train_1,test_1,train_2,test_2,train_3,test_3,train_4,test_4
mae,0.087048,0.07203,0.098855,0.076991,0.085523,0.122128,0.00045,0.082254,0.082688,0.138686
rmse,0.12819,0.110265,0.138085,0.085375,0.116711,0.170926,0.003132,0.118009,0.115043,0.169165
r2,0.308848,0.415263,0.288303,-1.608312,0.328891,0.15778,0.999597,0.17251,0.345423,0.158309
