# Dataset

In [None]:
from docs_jpmml_org import fetch_bank_marketing

dataset = fetch_bank_marketing()
#print(dataset)

X, y = dataset.getData()

categorical_cols = dataset.getCategoricalColumns()
continuous_cols = dataset.getContinuousColumns()

# Workflow

## Option A: ordinal encoding

Recommended for NumPy data.

In [None]:
from lightgbm import LGBMClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder

transformer = ColumnTransformer([
    ("continuous", "passthrough", continuous_cols),
    ("categorical", OrdinalEncoder(), categorical_cols)
])

# Identify categorical columns by index
#categorical_feature = [len(continuous_cols) + categorical_idx for categorical_idx in range(0, len(categorical_cols))]

# Identify categorical_columns by name
feature_name = continuous_cols + categorical_cols
categorical_feature = categorical_cols

classifier = LGBMClassifier(random_state = 42)

pipeline = Pipeline([
    ("transformer", transformer),
    ("classifier", classifier)
])
#pipeline.fit(X, y, classifier__categorical_feature = categorical_feature)
pipeline.fit(X, y, classifier__feature_name = feature_name, classifier__categorical_feature = categorical_feature)

## Option B: casting

Recommended for Pandas data.

In [None]:
from lightgbm import LGBMClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn2pmml.preprocessing import CastTransformer

transformer = ColumnTransformer(
    [("continuous", "passthrough", continuous_cols)] +
    [(categorical_col, CastTransformer("category"), [categorical_col]) for categorical_col in categorical_cols]
)
transformer.set_output(transform = "pandas")

classifier = LGBMClassifier(random_state = 42)

pipeline = Pipeline([
    ("transformer", transformer),
    ("classifier", classifier)
])
pipeline.fit(X, y)

# Export to PMML

In [None]:
classifier.booster_.save_model("Booster.txt")

In [None]:
from sklearn2pmml import sklearn2pmml

sklearn2pmml(pipeline, "LightGBMPipeline.pmml")