In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer

In [2]:
np.random.seed = 42

data = {
    "age": np.random.randint(0, 100, 100),
    "gender": np.random.choice(["male", "female"], size=100),
    "fever": np.random.choice([*np.random.uniform(95, 105, 10), np.nan], size=100),
    "cough": np.random.choice(["strong", "mild"], size=100),
    "city": np.random.choice(["a", "b", "c", "d", "e"], size=100),
    "covid": np.random.choice(["yes", "no"], size=100)
}

In [3]:
df = pd.DataFrame(data)

In [4]:
x_train, x_test, y_train, y_test = train_test_split(df.iloc[:,0:-1], df.iloc[:,-1], test_size=0.2, random_state=42)

In [5]:
transformer = ColumnTransformer(transformers=[
    ("tnf1", SimpleImputer(), ["fever"]), 
    ("tnf2", OrdinalEncoder(categories=[["mild", "strong"]]), ["cough"]), 
    ("tnf3", OneHotEncoder(sparse_output=False, drop="first"), ["gender", "city"])
], remainder="passthrough")

transformer.fit(x_train)

x_train_new = transformer.transform(x_train)
x_test_new = transformer.transform(x_test)

In [6]:
print(x_train.shape)
print(x_train_new.shape)

(80, 5)
(80, 8)
