# Feature Draft XGBoost Example

Currently supported LightGBM estimators:
- `xgboost.XGBClassifer`
    - Binary classifaciton with AUC metric only
- `xgboost.XGBRegressor`
    - Standard regression with RMSE metric only

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer, load_diabetes
import xgboost as xgb

import feature_draft as fd

## Binary Classification

In [2]:
data = load_breast_cancer()

In [3]:
# Define dataframe with features and response
df = pd.DataFrame(data["data"], columns=data["feature_names"])
df["response"] = data["target"]

# Define feature list
features = list(data["feature_names"])

# Define model object
xgb_model = xgb.XGBClassifier(seed=10)

In [4]:
# Instantiate FeatureDraft object
feature_drafter = fd.FeatureDraft(
    model=xgb_model,
    data=df,
    features=features,
    response="response",
)

In [5]:
# Draft features
feature_drafter.draft_features()

Baseline Metric: 0.5

Draft Round: 1
Feature Selected: worst perimeter,
Metric Improvement: 0.46817
New Metric: 0.96817

Draft Round: 2
Draft finished, final feature list: ['worst perimeter']


In [6]:
# show selected features
feature_drafter.selected_features

['worst perimeter']

In [7]:
# show metrics from final feature list cross-validation run
feature_drafter.best_metrics

[0.9515230920406158,
 0.9606943989518506,
 0.9806547619047619,
 0.9679232804232805,
 0.9800469483568075]

## Regression

In [8]:
data = load_diabetes()

In [9]:
# Define dataframe with features and response
df = pd.DataFrame(data["data"], columns=data["feature_names"])
df["response"] = data["target"]

# Define feature list
features = list(data["feature_names"])

# Define model object
xgb_model = xgb.XGBRegressor(seed=10)

In [10]:
# Instantiate FeatureDraft object
feature_drafter = fd.FeatureDraft(
    model=xgb_model,
    data=df,
    features=features,
    response="response",
)

In [11]:
# Draft features
feature_drafter.draft_features()

Baseline Metric: 5929.884896910383

Draft Round: 1
Feature Selected: bmi,
Metric Improvement: -1779.3
New Metric: 4150.6

Draft Round: 2
Draft finished, final feature list: ['bmi']


In [12]:
feature_drafter.best_metrics

[3984.9464366753778,
 4464.200010351696,
 3939.0857322486468,
 4113.608556334869,
 4250.991421518436]