## Prepare train_data and test_data

In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
X,y = datasets.load_breast_cancer(as_frame=True,return_X_y=True)
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size=0.7,random_state=335)

## Make HyperGBM experiment with default settings and run it

In [3]:
from hypergbm import make_experiment

In [4]:
%%time
train_data = pd.concat([X_train,y_train],axis=1)
experiment = make_experiment(train_data, target='target', reward_metric='precision')
estimator = experiment.run()

ExperimentProcessWidget(initData='{"steps": [{"index": 0, "name": "data_clean", "type": "DataCleanStep", "stat…

CPU times: user 3min 16s, sys: 10.9 s, total: 3min 27s
Wall time: 16 s


In [5]:
estimator

Pipeline(steps=[('data_clean',
                 DataCleanStep(cv=True,
                               data_cleaner_args={'correct_object_dtype': True,
                                                  'drop_columns': None,
                                                  'drop_constant_columns': True,
                                                  'drop_duplicated_columns': False,
                                                  'drop_idness_columns': True,
                                                  'drop_label_nan_rows': True,
                                                  'int_convert_to': 'float',
                                                  'nan_chars': None,
                                                  'reduce_mem_usage': False,
                                                  'reserve_columns': None},
                               name='data_clean')),
                ('est...
                 GreedyEnsemble(weight=[0.4 0.6 0.  0.  0.  0.  0.  0.  0. ], 

## Export the trained model

In [6]:
import pickle
with open('model.pkl','wb') as f:
    pickle.dump( estimator,f)

## Scoring with sklearn

In [7]:
from sklearn.metrics import get_scorer

scorer=get_scorer('precision')
# scorer=get_scorer('accuracy')
# scorer = get_scorer('roc_auc_ovo')
# sc=get_scorer('recall')
score = scorer(estimator,X_test,y_test)
score

0.9652173913043478

In [8]:
from sklearn.metrics import classification_report

y_pred=estimator.predict(X_test)
print(classification_report(y_test, y_pred, digits=5))

              precision    recall  f1-score   support

           0    0.96429   0.93103   0.94737        58
           1    0.96522   0.98230   0.97368       113

    accuracy                        0.96491       171
   macro avg    0.96475   0.95667   0.96053       171
weighted avg    0.96490   0.96491   0.96476       171



## Scoring with Hypernets.tabular toolbox

In [9]:
from hypernets.tabular.metrics import calc_score

preds=estimator.predict(X_test)
proba=estimator.predict_proba(X_test)

scores = calc_score(y_test,preds,proba,task='binary', metrics=['auc','accuracy','f1','recall','precision'] )
scores

{'auc': 0.9835215135794935,
 'accuracy': 0.9649122807017544,
 'f1': 0.9736842105263158,
 'recall': 0.9823008849557522,
 'precision': 0.9652173913043478}