In [None]:
import numpy as np

from skorecard import datasets
from skorecard.preprocessing import SimpleBucketTransformer
from skorecard.preprocessing import AgglomerativeBucketTransformer
from skorecard.preprocessing import QuantileBucketTransformer
from skorecard.preprocessing import TreeBucketTransformer

import pandas as pd

In [None]:
df = datasets.load_uci_credit_card(as_frame=True)
df.head(10)


### Simple Example

In [None]:
X = df['MARRIAGE'].values

In [None]:
MyBucketTransformer = AgglomerativeBucketTransformer(bin_count=2)

In [None]:
MyBucketTransformer.fit(X)

In [None]:
MyBucketTransformer.transform(X)

In [None]:
MyBucketTransformer.BucketDict

### Practical Example with Feature Union

In [None]:
from sklearn.pipeline import Pipeline, FeatureUnion
from skorecard.pipeline import ColumnSelector

In [None]:
transformer = Pipeline([
    ('bucketed_features', FeatureUnion(n_jobs=1, transformer_list=[

        ('simple', Pipeline([
            ('selector', ColumnSelector(['EDUCATION'])),
            ('bucketer', SimpleBucketTransformer(bin_count=4)),
            
        ])), 
        
        ('agglom', Pipeline([
            ('selector', ColumnSelector(['MARRIAGE'])),
            ('bucketer', AgglomerativeBucketTransformer(bin_count=2)),
        ])),

        ('quantile', Pipeline([
            ('selector', ColumnSelector(['LIMIT_BAL'])),
            ('bucketer', QuantileBucketTransformer(bin_count=10)),
        ])), 
        
        ('dummy', Pipeline([
            ('selector', ColumnSelector(['BILL_AMT1', 'default'])),
        ]))

    ])),  # bucketed_features close
])  # pipeline close


In [None]:
transformer.fit_transform(df)

### Practical Examples with ColumnTransformer

In [None]:
from skorecard.preprocessing import ManualBucketTransformer
from sklearn.compose import ColumnTransformer 

In [None]:
df = datasets.load_uci_credit_card(as_frame=True)
transformer = ColumnTransformer(
    transformers=[
        ('simple', SimpleBucketTransformer(bin_count=2), [1]),
        ('agglom', AgglomerativeBucketTransformer(bin_count=4), [0]),
        ('quantile', QuantileBucketTransformer(bin_count=[10, 6]), [2, 3])
    ],
    remainder='passthrough'
)

In [None]:
transformer.fit_transform(df.values)

In [None]:
transformer.named_transformers_.quantile.BucketDict

In [None]:
transformer.named_transformers_

### Manual Bucket Transformer

In [None]:
example_boundary_dict = {}

for i in range(len(transformer.named_transformers_) - 1):
    key = list(transformer.named_transformers_.keys())[i]
    for j in range(len(transformer.transformers[i][2])):
        example_boundary_dict[transformer.transformers[i][2][j]]= transformer.named_transformers_[key].BucketDict[f'Feature_{j}'].boundaries

In [None]:
example_boundary_dict

In [None]:
MBT = ManualBucketTransformer(boundary_dict=example_boundary_dict)

In [None]:
X = df.copy()
MBT.fit(X.values)
a = MBT.transform(X.values)

In [None]:
np.unique(a[:,3])

### Tree Bucket Transformer

In [None]:
from skorecard.preprocessing import TreeBucketTransformer

In [None]:
X = df['LIMIT_BAL'].values
y = df['default'].values

In [None]:
tbt = TreeBucketTransformer(inf_edges=False, 
    max_depth=2,
   criterion = 'entropy',
   min_samples_leaf =2000 , #Minimum number of entries in the bins
   min_impurity_decrease=0.001)

In [None]:
tbt.fit(X, y)

In [None]:
tbt.transform(X)

### Categorical Transformer

In [None]:
from skorecard.preprocessing import CatBucketTransformer

In [None]:
X = df['EDUCATION'].values
y = df['default'].values

In [None]:
sum(df['EDUCATION'].value_counts(normalize=True) > 0.2)

In [None]:
np.unique(X)

In [None]:
cbt = CatBucketTransformer(threshold_min=0.01, threshold_max=0.2, epsilon=0.05)

In [None]:
cbt.fit(X, y)

In [None]:
X = cbt.transform(X)

In [None]:
np.unique(X)

In [None]:
cbt.mapping_dict