In [None]:
import numpy as np

from skorecard import datasets
from skorecard.bucketing.bucketing import BucketTransformer as BT

In [None]:
df = datasets.load_uci_credit_card(as_frame=True)
df.head(10)

In [None]:
X = df[['LIMIT_BAL', 'BILL_AMT1']].values

## Simple Transformer

In [None]:
MyBucketTransformer = BT(bin_count=5, method='simple')

In [None]:
MyBucketTransformer.fit(X)

In [None]:
X_transformed = MyBucketTransformer.transform(X)

In [None]:
np.bincount(X_transformed[:,1].astype(int))

In [None]:
X_transformed

In [None]:
MyBucketTransformer.BucketDict

## Agglomerative Transformer

In [None]:
MyBucketTransformer = BT(bin_count=5, method='agglomerative')

In [None]:
MyBucketTransformer.fit(X)

In [None]:
X_transformed = MyBucketTransformer.transform(X)

In [None]:
np.bincount(X_transformed[:,1].astype(int))

## Quantile Transformer

In [None]:
MyBucketTransformer = BT(bin_count=5, method='quantile')

In [None]:
MyBucketTransformer.fit(X)

In [None]:
X_transformed = MyBucketTransformer.transform(X)

In [None]:
np.bincount(X_transformed[:,1].astype(int))

## Example Pipeline

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [None]:
df = datasets.load_uci_credit_card(as_frame=True)
df.head(10)

In [None]:
X, y = (
        df[["EDUCATION", "MARRIAGE", "LIMIT_BAL", "BILL_AMT1"]].values,
        df["default"].values,
    )

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [None]:
lr = LogisticRegression()

In [None]:
pipe = Pipeline(steps=[('Bucket', BT(bin_count=2, method='simple')),
                ('logistic', lr)
               ])

In [None]:
pipe.fit(X_train, y_train)

In [None]:
roc_auc_score(y_test, pipe.predict(X_test))