## Prepare train_data and test_data

In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from hypergbm import make_experiment
from hypernets.tabular.metrics import metric_to_scoring
from sklearn.metrics import get_scorer

In [2]:
X,y = datasets.load_breast_cancer(as_frame=True,return_X_y=True)
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=335)
train_data = pd.concat([X_train,y_train],axis=1)
eval_data = pd.concat([X_test,y_test],axis=1)

# Use Feature Reselection

HyperGBM can select features from train_data with the searched estimator, then re-search with HyperGBM (two-stages search). Set `feature_reselection=True` to enable feature reselection. HyperGBM select features with `permutation_importance` at this stage.

**Options**:

* feature_reselection : bool, (default=True)
    Whether to enable two stage feature selection with permutation importance.
* feature_reselection_estimator_size : int, (default=10)
    The number of estimator to evaluate feature importance. Only valid when *feature_reselection* is True.
* feature_reselection_strategy : str, (default='threshold')
    Strategy to reselect features(*threshold*, *number* or *quantile*).
* feature_reselection_threshold : float, (default=1e-5)
    Confidence threshold of the mean permutation importance. Only valid when *feature_reselection_strategy* is 'threshold'.
* feature_reselection_quantile:
    Confidence quantile of feature_importance. Only valid when *feature_reselection_strategy* is 'quantile'.
* feature_reselection_number:
    Expected feature number to keep. Only valid when *feature_reselection_strategy* is 'number'.
    
See [scikit-learn](https://scikit-learn.org/stable/modules/permutation_importance.html) from more details about `permutation_importance`.

In [3]:
experiment = make_experiment(train_data.copy(), target='target',
                             random_state=8888, max_trials=20,
                             feature_reselection=True)
estimator = experiment.run()
scorer = get_scorer(metric_to_scoring('accuracy'))
score = scorer(estimator, X_test, y_test)
score

ExperimentProcessWidget(initData='{"steps": [{"index": 0, "name": "data_clean", "type": "DataCleanStep", "stat…

0.9649122807017544