In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
from skorecard.bucket_mapping import BucketMapping


In [3]:
# Limited map with NA's
x = ["car", "motorcycle", "boat", "truck", "truck", np.nan]
bucket = BucketMapping("feature1", "categorical", map={"car": 0, "boat": 0},  specials={"special": ["truck"]})

In [4]:
bins = bucket.transform(x)
bins

0    0
1    1
2    0
3    2
4    2
5    3
dtype: int64

In [5]:
remap = {k:v for k,v in enumerate(bucket.labels)}
remap

{0: ['boat', 'car'], 1: 'other', 2: 'special', 3: 'Missing'}

In [6]:
bins.map(remap)

0    [boat, car]
1          other
2    [boat, car]
3        special
4        special
5        Missing
dtype: object

In [7]:
x = [0, 1, 2, 3, 4, 5, 2, np.nan]
bucket = BucketMapping("feature1", "numerical", map=[3, 4], specials={"special": [2]})
bins = bucket.transform(x)
bins

array([0, 0, 3, 0, 1, 2, 3, 4])

In [8]:
remap = {k:v for k,v in enumerate(bucket.labels)}
remap

{0: '(-inf, 3.0]',
 1: '(3.0, 4.0]',
 2: '(4.0, inf]',
 3: 'special',
 4: 'Missing'}

In [9]:
pd.Series(bins).map(remap)

0    (-inf, 3.0]
1    (-inf, 3.0]
2        special
3    (-inf, 3.0]
4     (3.0, 4.0]
5     (4.0, inf]
6        special
7        Missing
dtype: object

# Code testing part

### Use case

# Run the tests

In [10]:
import pytest

In [31]:
from skorecard.datasets import load_uci_credit_card

df = load_uci_credit_card(as_frame=True)

In [80]:
from skorecard.bucketers import OrdinalCategoricalBucketer, OptimalBucketer

X = df[["EDUCATION"]]
y = df["default"]

ocb = OrdinalCategoricalBucketer(tol=0.03, variables=["EDUCATION"], encoding_method="ordered", specials = {"ed 0":[1]})
ocb.fit(X, y)

OrdinalCategoricalBucketer(encoding_method='ordered', specials={'ed 0': [1]},
                           tol=0.03, variables=['EDUCATION'])

In [81]:
X

Unnamed: 0,EDUCATION
0,1
1,2
2,1
3,1
4,2
...,...
5995,2
5996,2
5997,1
5998,2


In [82]:
X = df[["LIMIT_BAL", "BILL_AMT1", "EDUCATION"]]
y = df["default"].values

obt = OptimalBucketer(variables=["EDUCATION"], variables_type="categorical", 
                      specials ={"special_one":[0]}
                     )
obt.fit(X, y)

OptimalBucketer(specials={'special_one': [0]}, variables=['EDUCATION'],
                variables_type='categorical')

In [83]:
obt.features_bucket_mapping_["EDUCATION"].map

{1: 0, 3: 1, 2: 2, 5: 3, 4: 3, 6: 3, 0: 3}

In [87]:
obt.transform(X)['EDUCATION'][X['EDUCATION']==0].shape[0]

1

In [85]:
X['EDUCATION'].value_counts()

2    2725
1    2186
3    1013
5      51
4      13
6      11
0       1
Name: EDUCATION, dtype: int64

In [79]:
obt.features_bucket_mapping_["EDUCATION"].map

{1: 0, 3: 1, 2: 2, 5: 3, 4: 3, 6: 3, 0: 3}

In [39]:
ocb.features_bucket_mapping_["EDUCATION"]

BucketMapping(feature_name='EDUCATION', type='categorical', map={1: 0, 3: 1, 2: 2}, right=True, specials={'ed 0': [0]}, labels=[[1], [3], [2], 'other', 'ed 0'])

In [None]:
import pytest
import numpy as np
import pandas as pd

from sklearn.pipeline import make_pipeline
from sklearn.exceptions import NotFittedError
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from skorecard.bucketers import (
    EqualWidthBucketer,
    EqualFrequencyBucketer,
    OrdinalCategoricalBucketer,
    DecisionTreeBucketer,
    OptimalBucketer,
)
from skorecard.pipeline import get_features_bucket_mapping, KeepPandas, make_coarse_classing_pipeline
from skorecard.bucket_mapping import BucketMapping

from skorecard.bucketers import OptimalBucketer

y = df["default"].values
X = df.drop(columns=["default"])

nested_pipeline = make_pipeline(
    make_pipeline(EqualWidthBucketer(bins=5, variables=["LIMIT_BAL", "BILL_AMT1"])),
    OrdinalCategoricalBucketer(variables=["EDUCATION", "MARRIAGE"]),
)

with pytest.raises(NotFittedError):
    get_features_bucket_mapping(nested_pipeline)

nested_pipeline.fit(X, y)
bm = get_features_bucket_mapping(nested_pipeline)


In [None]:
bm.get("EDUCATION")

In [None]:
assert bm.get("EDUCATION") == BucketMapping(
    feature_name="EDUCATION", type="categorical", map={2: 1, 1: 2, 3: 3}, right=True
)

In [None]:
obt.features_bucket_mapping_.get("EDUCATION")

In [None]:
X_trans['EDUCATION']

In [None]:
X["EDUCATION"].value_counts(normalize=True)

In [None]:
cbt.features_bucket_mapping_