In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({
    'sales': [100000, 222000, 1000000, 522000, 111111, 222222, 111111, 20000, 75000, 90000, 1000000, 10000],
    'city': ['Tampa', 'Tampa', 'Orlando', 'Jacksonville', 'Miami',  'Jacksonville',  'Miami',  'Miami', 'Orlando', 'Orlando', 'Orlando', 'Orlando'],
    'size': ['Small', 'Medium', 'Large', 'Large', 'Small', 'Medium', 'Large', 'Small', 'Medium', 'Medium', 'Medium', 'Medium']
})

In [3]:
df

Unnamed: 0,sales,city,size
0,100000,Tampa,Small
1,222000,Tampa,Medium
2,1000000,Orlando,Large
3,522000,Jacksonville,Large
4,111111,Miami,Small
5,222222,Jacksonville,Medium
6,111111,Miami,Large
7,20000,Miami,Small
8,75000,Orlando,Medium
9,90000,Orlando,Medium


In [4]:
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

In [25]:
# Create OneHotEncoder for 'city' column (convert categorical values to binary columns)
ohe = OneHotEncoder(sparse_output = False)
ohe

In [27]:
# Create OrdinalEncoder for 'size' column (convert categorical values to numeric order)
ode = OrdinalEncoder()
ode

In [8]:
from sklearn.compose import make_column_transformer

In [28]:
# Import make_column_transformer to apply multiple transformations
ct = make_column_transformer(
    (ohe, ['city']),    # Apply OneHotEncoder to 'city'
    (ode, ['size']),    # Apply OrdinalEncoder to 'size'
    remainder='passthrough'  # Keep other columns (e.g., 'sales')
)

In [29]:
# Set the output to be a pandas DataFrame
ct.set_output(transform = "pandas")

In [30]:
# Apply transformations and save result in df_pandas
df_pandas = ct.fit_transform(df)

In [13]:
df_pandas

Unnamed: 0,onehotencoder__city_Jacksonville,onehotencoder__city_Miami,onehotencoder__city_Orlando,onehotencoder__city_Tampa,ordinalencoder__size,remainder__sales
0,0.0,0.0,0.0,1.0,2.0,100000
1,0.0,0.0,0.0,1.0,1.0,222000
2,0.0,0.0,1.0,0.0,0.0,1000000
3,1.0,0.0,0.0,0.0,0.0,522000
4,0.0,1.0,0.0,0.0,2.0,111111
5,1.0,0.0,0.0,0.0,1.0,222222
6,0.0,1.0,0.0,0.0,0.0,111111
7,0.0,1.0,0.0,0.0,2.0,20000
8,0.0,0.0,1.0,0.0,1.0,75000
9,0.0,0.0,1.0,0.0,1.0,90000


In [None]:
# DROP

In [31]:
# Apply transformations but drop other columns
ct2 = make_column_transformer(
    (ohe, [1]),
    (ode, [2]),
    remainder = 'drop'
)

In [16]:
ct2.set_output(transform = 'pandas')

In [18]:
df_pandas2 = ct2.fit_transform(df)

In [19]:
df_pandas2

Unnamed: 0,onehotencoder__city_Jacksonville,onehotencoder__city_Miami,onehotencoder__city_Orlando,onehotencoder__city_Tampa,ordinalencoder__size
0,0.0,0.0,0.0,1.0,2.0
1,0.0,0.0,0.0,1.0,1.0
2,0.0,0.0,1.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0
4,0.0,1.0,0.0,0.0,2.0
5,1.0,0.0,0.0,0.0,1.0
6,0.0,1.0,0.0,0.0,0.0
7,0.0,1.0,0.0,0.0,2.0
8,0.0,0.0,1.0,0.0,1.0
9,0.0,0.0,1.0,0.0,1.0


In [20]:
ct3 = make_column_transformer(
    (ohe, ['city']),
    ('passthrough', ['size']),
    remainder = 'drop'
)

In [21]:
ct3.set_output(transform = 'pandas')

In [22]:
df_pandas3 = ct3.fit_transform(df)

In [23]:
df_pandas3

Unnamed: 0,onehotencoder__city_Jacksonville,onehotencoder__city_Miami,onehotencoder__city_Orlando,onehotencoder__city_Tampa,passthrough__size
0,0.0,0.0,0.0,1.0,Small
1,0.0,0.0,0.0,1.0,Medium
2,0.0,0.0,1.0,0.0,Large
3,1.0,0.0,0.0,0.0,Large
4,0.0,1.0,0.0,0.0,Small
5,1.0,0.0,0.0,0.0,Medium
6,0.0,1.0,0.0,0.0,Large
7,0.0,1.0,0.0,0.0,Small
8,0.0,0.0,1.0,0.0,Medium
9,0.0,0.0,1.0,0.0,Medium


🎯 Simplified Explanation:

1. Import Libraries:

- pandas: For creating and manipulating data.

- OneHotEncoder: Converts categorical columns (city) to binary format.

- OrdinalEncoder: Converts categorical columns (size) to numeric order.

- make_column_transformer: Applies different transformations to different columns.

2. Create DataFrame:

- sales: Numeric column.

- city: Categorical column (Tampa, Orlando, etc.).

- size: Categorical column (Small, Medium, Large).

3. Encoders:

- ohe: OneHotEncoder for city.

- ode: OrdinalEncoder for size.

4. Column Transformer with passthrough (ct):

- Encodes city and size but keeps the sales column.

5. Column Transformer with drop (ct2):

- Encodes city and size but drops the sales column.