In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer


In [2]:
data = {
    'age': [25, 30, np.nan, 35, 40],
    'salary': [50000, 60000, 70000, np.nan, 90000],
    'city': ['New York', 'London', 'Paris', 'Tokyo', 'New York'],
    'experience': [2, 5, 8, 12, 15]
}

df = pd.DataFrame(data)

In [3]:
numeric_features = ['age', 'salary', 'experience']
categorical_features = ['city']

In [4]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

In [5]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [6]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [7]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor)
])

In [8]:
processed_data = pipeline.fit_transform(df)
print("Processed data :")
print(processed_data)

Processed data :
[[-1.5        -1.28142321 -1.36947358  0.          1.          0.
   0.        ]
 [-0.5        -0.52764485 -0.72753284  1.          0.          0.
   0.        ]
 [ 0.          0.22613351 -0.0855921   0.          0.          1.
   0.        ]
 [ 0.5        -0.15075567  0.77032889  0.          0.          0.
   1.        ]
 [ 1.5         1.73369023  1.41226963  0.          1.          0.
   0.        ]]
