In [1]:
from optbinning import OptimalBinning
import numpy as np
import pandas as pd

# Create example training dataset
np.random.seed(42)
train_data = pd.DataFrame({
    "numeric_feature1": np.random.uniform(0, 100, 1000),
    "numeric_feature2": np.random.uniform(10, 50, 1000),
    "categorical_feature1": np.random.choice(["A", "B", "C"], size=1000),
    "categorical_feature2": np.random.choice(["X", "Y"], size=1000),
    "boolean_feature": np.random.choice([True, False], size=1000),
    "ordinal_feature": np.random.choice([1, 2, 3, 4], size=1000),
    "target": np.random.randint(0, 2, 1000)  # Binary target
})

# Create a new dataset (simulating unseen test data)
new_data = pd.DataFrame({
    "numeric_feature1": np.random.uniform(0, 100, 500),
    "numeric_feature2": np.random.uniform(10, 50, 500),
    "categorical_feature1": np.random.choice(["A", "B", "C"], size=500),
    "categorical_feature2": np.random.choice(["X", "Y"], size=500),
    "boolean_feature": np.random.choice([True, False], size=500),
    "ordinal_feature": np.random.choice([1, 2, 3, 4], size=500)
})

# Separate features and target in training data
X_train = train_data.drop(columns=["target"])
y_train = train_data["target"]

# Dictionary to store trained binning models
binning_models = {}

# Train binning models on the training data
for feature in X_train.columns:
    # Detect feature type
    feature_dtype = X_train[feature].dtype

    if feature_dtype == "object":  # Categorical variables
        binning_type = "categorical"
        monotonic_trend = None
    elif np.issubdtype(feature_dtype, np.number):  # Numerical and ordinal variables
        binning_type = "numerical"
        monotonic_trend = "auto"
    elif feature_dtype == "bool":  # Boolean variables
        binning_type = "categorical"
        monotonic_trend = None
    else:
        raise ValueError(f"Unsupported feature type: {feature_dtype} for feature '{feature}'")

    # Fit binning model
    opt_binning = OptimalBinning(name=feature, dtype=binning_type, solver="cp", monotonic_trend=monotonic_trend)
    opt_binning.fit(X_train[feature], y_train)

    # Store trained binning model
    binning_models[feature] = opt_binning

# Function to apply trained bins to a new dataset
def apply_binning_models(new_data, binning_models):
    woe_transformed_df = pd.DataFrame()

    for feature, model in binning_models.items():
        if feature in new_data.columns:
            # Transform feature using pre-trained binning model
            woe_values = model.transform(new_data[feature], metric="woe")
            woe_transformed_df[f"{feature}_woe"] = woe_values

    return woe_transformed_df

# Apply the trained binning models to the new dataset
new_woe_df = apply_binning_models(new_data, binning_models)

# Display the first few rows of the transformed dataset
print(new_woe_df.head())


  from pandas.core import (


(CVXPY) Feb 12 03:31:03 PM: Encountered unexpected exception importing solver GLOP:
RuntimeError('Unrecognized new version of ortools (9.11.4210). Expected < 9.10.0. Please open a feature request on cvxpy to enable support for this version.')
(CVXPY) Feb 12 03:31:03 PM: Encountered unexpected exception importing solver PDLP:
RuntimeError('Unrecognized new version of ortools (9.11.4210). Expected < 9.10.0. Please open a feature request on cvxpy to enable support for this version.')
   numeric_feature1_woe  numeric_feature2_woe  categorical_feature1_woe  \
0              0.039567             -0.322896                  0.037823   
1              0.039567              0.055338                 -0.099822   
2             -0.055267             -0.093279                  0.037823   
3              0.039567             -0.093279                  0.037823   
4             -0.104586              0.694702                  0.037823   

   categorical_feature2_woe  boolean_feature_woe  ordinal_featu

In [2]:

# Function to apply trained bins to a new dataset
def apply_binning_models(new_data, binning_models):
    woe_transformed_df = pd.DataFrame()

    for feature, model in binning_models.items():
        if feature in new_data.columns:
            # Transform feature using pre-trained binning model
            woe_values = model.transform(new_data[feature], metric="woe")
            woe_transformed_df[f"{feature}_woe"] = woe_values

    return woe_transformed_df

# Apply the trained binning models to the new dataset
new_woe_df = apply_binning_models(new_data, binning_models)

# Display the first few rows of the transformed dataset
print(new_woe_df.head())


   numeric_feature1_woe  numeric_feature2_woe  categorical_feature1_woe  \
0              0.039567             -0.322896                  0.037823   
1              0.039567              0.055338                 -0.099822   
2             -0.055267             -0.093279                  0.037823   
3              0.039567             -0.093279                  0.037823   
4             -0.104586              0.694702                  0.037823   

   categorical_feature2_woe  boolean_feature_woe  ordinal_feature_woe  
0                 -0.021234            -0.012774            -0.012833  
1                  0.022034             0.013039            -0.012833  
2                 -0.021234            -0.012774            -0.072308  
3                  0.022034             0.013039             0.086297  
4                  0.022034            -0.012774            -0.072308  


NameError: name 'binning_model' is not defined

In [None]:
#putting in bin names

def apply_binning_models_with_labels(new_data, binning_models):
    transformed_df = pd.DataFrame()

    for feature, model in binning_models.items():
        if feature in new_data.columns:

            bin_labels = model.transform(new_data[feature], metric="binning")
            woe_values = model.transform(new_data[feature], metric="woe")

   
            transformed_df[f"{feature}_bin_label"] = bin_labels
            transformed_df[f"{feature}_woe"] = woe_values

    return transformed_df
