Need to get the feature names output by a ColumnTransformer?

Use get_feature_names(), which now works with "passthrough" columns (new in version 0.23)!

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer

In [2]:
df = pd.read_csv('train.csv').dropna()

In [3]:
# select 4 features
X = df[['Embarked', 'Sex', 'Parch', 'Fare']]

In [4]:
# one-hot encode "Embarked" and "Sex", and passthrough "Parch" and "Fare"
ct = make_column_transformer(
    (OneHotEncoder(), ['Embarked', 'Sex']),
    remainder='passthrough')

In [5]:
# ColumnTransformer outputs 7 columns
ct.fit_transform(X).shape

(183, 7)

In [8]:
# get the names of those 7 features
ct.get_feature_names_out()

array(['onehotencoder__Embarked_C', 'onehotencoder__Embarked_Q',
       'onehotencoder__Embarked_S', 'onehotencoder__Sex_female',
       'onehotencoder__Sex_male', 'remainder__Parch', 'remainder__Fare'],
      dtype=object)