# Personal Highlights of Scikit-Learn 1.0

## 1. Feature Names

In [1]:
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pandas as pd

df = pd.DataFrame({
    "pet": ["dog", "cat", "fish"],
    "age": [3, 7, 1],
    "noise": [-99, pd.NA, 1e-10],
    "target": [1, 0, 1],
})
y = df.pop("target")
X = df

preprocessor = ColumnTransformer(
    [
        ("numerical", StandardScaler(), ["age"]),
        ("categorical", OneHotEncoder(), ["pet"]),
    ],
    verbose_feature_names_out=False,
    remainder="drop",
)

pipe = make_pipeline(preprocessor, LogisticRegression())
pipe.fit(X, y)
pipe[:-1].get_feature_names_out()

array(['age', 'pet_cat', 'pet_dog', 'pet_fish'], dtype=object)

## 2. ColumnTransformer allows changed order of columns

In [2]:
df_new = pd.DataFrame({
    "age": [1, 9, 3],
    "another_noise": [pd.NA, -99, 1e-10],
    "pet": ["cat", "dog", "fish"],
})
pipe.predict(df_new)

array([1, 0, 1])