# Feature Draft LightGBM Example

Currently supported LightGBM estimators:
- `lightgbm.LGBMClassifer`
    - Binary classifaciton with AUC metric only
- `lightgbm.LGBMRegressor`
    - Standard regression with RMSE metric only

In [1]:
import lightgbm as lgbm
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer, load_diabetes

import feature_draft as fd

## Binary Classification

In [2]:
data = load_breast_cancer()

In [3]:
# Define dataframe with features and response
df = pd.DataFrame(data["data"], columns=data["feature_names"])
df["response"] = data["target"]

# Define feature list
features = list(data["feature_names"])

# Define model object
lgbm_model = lgbm.LGBMClassifier(seed=10)

In [4]:
# Instantiate FeatureDraft object
feature_drafter = fd.FeatureDraft(
    model=lgbm_model,
    data=df,
    features=features,
    response="response",
)

In [5]:
# Draft features
feature_drafter.draft_features()

Baseline Metric: 0.5

Draft Round: 1
Feature Selected: mean concave points,
Metric Improvement: 0.45977
New Metric: 0.95977

Draft Round: 2
Feature Selected: worst radius,
Metric Improvement: 0.022381
New Metric: 0.98215

Draft Round: 3
Draft finished, final feature list: ['mean concave points', 'worst radius']


In [6]:
# show selected features
feature_drafter.selected_features

['mean concave points', 'worst radius']

In [7]:
# show metrics from final feature list cross-validation run
feature_drafter.best_metrics

[0.9618408123157549,
 0.9849328529315428,
 0.9871031746031745,
 0.9867724867724866,
 0.9901073105298457]

## Regression

In [8]:
data = load_diabetes()

In [9]:
# Define dataframe with features and response
df = pd.DataFrame(data["data"], columns=data["feature_names"])
df["response"] = data["target"]

# Define feature list
features = list(data["feature_names"])

# Define model object
lgbm_model = lgbm.LGBMRegressor(seed=10)

In [10]:
# Instantiate FeatureDraft object
feature_drafter = fd.FeatureDraft(
    model=lgbm_model,
    data=df,
    features=features,
    response="response",
)

In [11]:
# Draft features
feature_drafter.draft_features()

Baseline Metric: 5929.884896910383

Draft Round: 1
Feature Selected: bmi,
Metric Improvement: -2024.2
New Metric: 3905.7

Draft Round: 2
Draft finished, final feature list: ['bmi']


In [12]:
feature_drafter.best_metrics

[3752.6637758049064,
 4209.69759101858,
 3864.3481393976767,
 3648.974427414406,
 4052.7142400054217]