In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler

def build_sklearn_pipeline(X: pd.DataFrame) -> Pipeline:
    # split feature types
    cat_cols = [c for c in X.columns if X[c].dtype.name in ("object","category")]
    num_cols = [c for c in X.columns if c not in cat_cols]

    try:
        # scikit-learn >= 1.4
        ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=True)
    except TypeError:
        # scikit-learn <= 1.3
        ohe = OneHotEncoder(handle_unknown="ignore", sparse=True)

    transformers = []
    if num_cols:
        transformers.append(("num", StandardScaler(with_mean=False), num_cols))
    if cat_cols:
        transformers.append(("cat", ohe, cat_cols))

    pre = ColumnTransformer(
        transformers=transformers,
        remainder="drop",
        sparse_threshold=0.3
    )

    rf = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)
    return Pipeline([("pre", pre), ("rf", rf)])
