### import libraries

In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder
import numpy as np

### import dataset

In [2]:
df = pd.read_csv("columntransformer.csv")
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


### Checking null values

In [5]:
df.isnull().sum()   ## fever having 10 null values

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

### train_test_split

In [9]:
x_train,x_test,y_train,y_test=train_test_split(df.drop(columns=["has_covid"]),df["has_covid"],test_size=0.3,random_state=3)

### replace null value with mean using simple imputer

In [10]:
si = SimpleImputer()

In [11]:
x_train_fever = si.fit_transform(x_train[["fever"]])
x_test_fever = si.transform(x_test[["fever"]])

In [20]:
x_train_fever

array([[101.        ],
       [100.75409836],
       [102.        ],
       [103.        ],
       [100.        ],
       [ 98.        ],
       [ 98.        ],
       [103.        ],
       [101.        ],
       [102.        ],
       [103.        ],
       [102.        ],
       [102.        ],
       [101.        ],
       [102.        ],
       [ 99.        ],
       [100.        ],
       [100.        ],
       [104.        ],
       [101.        ],
       [ 98.        ],
       [101.        ],
       [ 99.        ],
       [ 98.        ],
       [ 98.        ],
       [100.75409836],
       [ 99.        ],
       [104.        ],
       [ 99.        ],
       [101.        ],
       [104.        ],
       [100.        ],
       [ 98.        ],
       [103.        ],
       [104.        ],
       [101.        ],
       [100.        ],
       [ 99.        ],
       [ 98.        ],
       [ 99.        ],
       [100.75409836],
       [104.        ],
       [ 98.        ],
       [100

### Ordinal encoding on cough variable

In [15]:
oe = OrdinalEncoder(categories=[["Mild","Strong"]])

In [16]:
x_train_cough = oe.fit_transform(x_train[["cough"]])
x_test_cough = oe.transform(x_test[["cough"]])

In [21]:
x_train_cough

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.]])

### apply Onehotencoder on gender and city variable

In [31]:
ohe = OneHotEncoder(sparse_output=False,handle_unknown="ignore")

In [32]:
x_train_city_gender = ohe.fit_transform(x_train[["gender","city"]])
x_test_city_gender = ohe.transform(x_test[["gender","city"]])

In [35]:
x_train_city_gender

array([[0., 1., 0., 1., 0., 0.],
       [1., 0., 1., 0., 0., 0.],
       [0., 1., 1., 0., 0., 0.],
       [0., 1., 0., 0., 1., 0.],
       [1., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0., 1.],
       [1., 0., 1., 0., 0., 0.],
       [1., 0., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 1., 0.],
       [1., 0., 0., 1., 0., 0.],
       [1., 0., 1., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 1., 0.],
       [1., 0., 0., 1., 0., 0.],
       [0., 1., 1., 0., 0., 0.],
       [0., 1., 1., 0., 0., 0.],
       [1., 0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0., 1.],
       [0., 1., 0., 1., 0., 0.],
       [1., 0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 1.],
       [1., 0., 1., 0., 0., 0.],
       [0., 1., 0., 1., 0., 0.],
       [1., 0., 1., 0., 0., 0.],
       [1., 0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0., 1.],
       [0.

### seperate age column

In [36]:
x_train_age=x_train.drop(columns=["fever","cough","gender","city"])
x_test_age=x_test.drop(columns=["fever","cough","gender","city"])

In [38]:
x_train_age

Unnamed: 0,age
30,15
5,84
13,64
31,83
88,5
...,...
0,60
72,83
56,71
3,31


### concat all columns

In [44]:
x_train_final = np.concatenate((x_train_fever,x_train_city_gender,x_train_cough,x_train_age),axis=1)
x_test_final = np.concatenate((x_test_fever,x_test_city_gender,x_test_cough,x_test_age),axis=1)

In [45]:
x_train_final.shape

(70, 9)

### Doing all these steps using column transformer

In [53]:
transformer = ColumnTransformer(transformers=[
    ("tf1",SimpleImputer(),["fever"]),
    ("tf2",OrdinalEncoder(categories=[["Mild","Strong"]]),["cough"]),
    ("tf3",OneHotEncoder(sparse_output=False,handle_unknown="ignore"),["gender","city"])
],remainder="passthrough")

In [54]:
x_train_finally = transformer.fit_transform(x_train)
x_test_finally = transformer.transform(x_test)

In [55]:
x_train_finally.shape

(70, 9)