# Dataset

In [None]:
from docs_jpmml_org import fetch_bank_marketing

dataset = fetch_bank_marketing()
#print(dataset)

X, y = dataset.getData()

categorical_cols = dataset.getCategoricalColumns()
continuous_cols = dataset.getContinuousColumns()

#continuous_cols.remove("duration")

# Pipeline

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import f_classif, SelectKBest
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

import joblib

# Combined feature mapper and transformer
transformer = ColumnTransformer([
    ("continuous", StandardScaler(), continuous_cols),
    ("categorical", OneHotEncoder(), categorical_cols)
], remainder = "drop")

# Feature selector
selector = SelectKBest(k = 20, score_func = f_classif)

# Model
classifier = LogisticRegression(class_weight = "balanced", random_state = 42)

pipeline = Pipeline([
    ("transformer", transformer),
    ("selector", selector),
    ("classifier", classifier)
])
pipeline.fit(X, y)

# Serialization in joblib-flavoured Pickle data format for backup purposes
joblib.dump(pipeline, "Pipeline.pkl")

# Export to PMML

In [None]:
from sklearn2pmml import sklearn2pmml

# Conversion to PMML
sklearn2pmml(pipeline, "Pipeline.pmml")