In [17]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import FeatureUnion, Pipeline
from enum import Enum


class Mode(Enum):
    CATEGORICAL = 1
    NUMERICAL = 2


class FeatureSetDecider(BaseEstimator, TransformerMixin):
    _supported_dtypes = {
        Mode.CATEGORICAL: [np.object],
        Mode.NUMERICAL: [np.int64]
    }

    def __init__(self, mode: Mode):
        self.mode = mode

    def fit(self, x: pd.DataFrame, y: pd.DataFrame = None):
        return self

    def transform(self, x: pd.DataFrame, y: pd.DataFrame = None):
        return x.select_dtypes(self._supported_dtypes[self.mode])

pipeline = FeatureUnion([
    ("categorical", Pipeline([
        ("FeatureSetDecider", FeatureSetDecider(Mode.CATEGORICAL)),
        ("OneHotEncoder", OneHotEncoder(sparse=False)),
    ])),
    ("numerical", Pipeline([
        ("FeatureSetDecider", FeatureSetDecider(Mode.NUMERICAL)),
        ("StandardScaler", StandardScaler()),
    ])),
])

X_train = pd.DataFrame({'A': ['a1', 'a3', 'a2'],
                        'B': ['b2', 'b1', 'b3'],
                        'C': [1, 2, 3]})
X_tx = pipeline.fit_transform(X_train)


In [18]:
X_tx

array([[ 1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        , -1.22474487],
       [ 0.        ,  0.        ,  1.        ,  1.        ,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  1.        ,  0.        ,  0.        ,  0.        ,
         1.        ,  1.22474487]])