In [1]:
import numpy as np
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate sample classification data
X_clf, y_clf = make_classification(n_samples=300, n_features=5, random_state=42)
X_clf_train, X_clf_test, y_clf_train, y_clf_test = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)

# Define a preprocessing pipeline
numeric_features = [0, 1, 2]
categorical_features = [3, 4]

numeric_transformer = Pipeline([
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline([
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)
])

# Feature union
feature_union = FeatureUnion([
    ('select_best', SelectKBest(score_func=f_classif, k=3)),
    ('scaler', StandardScaler())
])

# Full pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

# Train pipeline
pipeline.fit(X_clf_train, y_clf_train)
print("Pipeline successfully trained and automated feature processing included.")


Pipeline successfully trained and automated feature processing included.
