In [1]:
import pandas as pd
data = {
    "Car Model": [
        "BMW X5","BMW X5","BMW X5","BMW X5","BMW X5",
        "Audi A5","Audi A5","Audi A5","Audi A5",
        "Mercedez Benz C class","Mercedez Benz C class","Mercedez Benz C class","Mercedez Benz C class"
    ],
    "Mileage": [69000,35000,57000,22500,46000,59000,52000,72000,91000,67000,83000,79000,59000],
    "Sell Price($)": [18000,34000,26100,40000,31500,29400,32000,19300,12000,22000,20000,21000,33000],
    "Age(yrs)": [6,3,5,2,4,5,5,7,8,6,7,7,5]
}
df = pd.DataFrame(data)

# ---- One-hot encode Car Model ----
df_ohe = pd.get_dummies(df, columns=["Car Model"], prefix="Model", dtype=int)
print(df_ohe)

    Mileage  Sell Price($)  Age(yrs)  Model_Audi A5  Model_BMW X5  \
0     69000          18000         6              0             1   
1     35000          34000         3              0             1   
2     57000          26100         5              0             1   
3     22500          40000         2              0             1   
4     46000          31500         4              0             1   
5     59000          29400         5              1             0   
6     52000          32000         5              1             0   
7     72000          19300         7              1             0   
8     91000          12000         8              1             0   
9     67000          22000         6              0             0   
10    83000          20000         7              0             0   
11    79000          21000         7              0             0   
12    59000          33000         5              0             0   

    Model_Mercedez Benz C class  

In [3]:
df_ohe1 = pd.get_dummies(df, columns=["Car Model"], prefix="Model", drop_first=True, dtype=int)
print(df_ohe1)

    Mileage  Sell Price($)  Age(yrs)  Model_BMW X5  \
0     69000          18000         6             1   
1     35000          34000         3             1   
2     57000          26100         5             1   
3     22500          40000         2             1   
4     46000          31500         4             1   
5     59000          29400         5             0   
6     52000          32000         5             0   
7     72000          19300         7             0   
8     91000          12000         8             0   
9     67000          22000         6             0   
10    83000          20000         7             0   
11    79000          21000         7             0   
12    59000          33000         5             0   

    Model_Mercedez Benz C class  
0                             0  
1                             0  
2                             0  
3                             0  
4                             0  
5                             0  
6    

In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
data = {
    "Car Model": [
        "BMW X5","BMW X5","BMW X5","BMW X5","BMW X5",
        "Audi A5","Audi A5","Audi A5","Audi A5",
        "Mercedez Benz C class","Mercedez Benz C class","Mercedez Benz C class","Mercedez Benz C class"
    ],
    "Mileage": [69000,35000,57000,22500,46000,59000,52000,72000,91000,67000,83000,79000,59000],
    "Sell Price($)": [18000,34000,26100,40000,31500,29400,32000,19300,12000,22000,20000,21000,33000],
    "Age(yrs)": [6,3,5,2,4,5,5,7,8,6,7,7,5]
}
df = pd.DataFrame(data)
cat_cols = ["Car Model"]
num_cols = ["Mileage", "Sell Price($)", "Age(yrs)"]
def ct_to_dataframe(ct, X, num_cols):
    ohe_names = ct.named_transformers_["ohe"].get_feature_names_out(cat_cols)
    cols = list(ohe_names) + num_cols
    return pd.DataFrame(X, columns=cols, index=df.index)
ct_all = ColumnTransformer(
    transformers=[
        ("ohe", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols)
    ],
    remainder="passthrough",  
)
X_all = ct_all.fit_transform(df)
df_all = ct_to_dataframe(ct_all, X_all, num_cols)
print("Strict one-hot (all categories kept):")
print(df_all.head())
ct_drop = ColumnTransformer(
    transformers=[
        ("ohe", OneHotEncoder(drop="first", handle_unknown="ignore", sparse_output=False), cat_cols)
    ],
    remainder="passthrough",
)
X_drop = ct_drop.fit_transform(df)
df_drop = ct_to_dataframe(ct_drop, X_drop, num_cols)
print("\nDrop-first (k-1 dummies):")
print(df_drop.head())

Strict one-hot (all categories kept):
   Car Model_Audi A5  Car Model_BMW X5  Car Model_Mercedez Benz C class  \
0                0.0               1.0                              0.0   
1                0.0               1.0                              0.0   
2                0.0               1.0                              0.0   
3                0.0               1.0                              0.0   
4                0.0               1.0                              0.0   

   Mileage  Sell Price($)  Age(yrs)  
0  69000.0        18000.0       6.0  
1  35000.0        34000.0       3.0  
2  57000.0        26100.0       5.0  
3  22500.0        40000.0       2.0  
4  46000.0        31500.0       4.0  

Drop-first (k-1 dummies):
   Car Model_BMW X5  Car Model_Mercedez Benz C class  Mileage  Sell Price($)  \
0               1.0                              0.0  69000.0        18000.0   
1               1.0                              0.0  35000.0        34000.0   
2               