In [48]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

In [49]:
df = pd.read_csv('./covid_toy.csv')
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [50]:
df.shape

(100, 6)

In [51]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [52]:
X = df.drop(columns="has_covid")
y = df['has_covid']
lb = LabelEncoder()
lb.fit_transform(y)
y = pd.DataFrame(y)

In [53]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.2,random_state=42)

In [54]:
ct = ColumnTransformer(
        transformers=[('gender_ohe',OneHotEncoder(),[1]),
        ('fever_interpolation',SimpleImputer(),[2]),
        ('city_ohe',OneHotEncoder(sparse_output=False,drop='first'),[4])
    ],
    remainder="passthrough"
)


In [56]:
X_train_trans = ct.fit_transform(X_train)
X_test_trans = ct.transform(X_test)

array([[1.0, 0.0, 101.0, 0.0, 0.0, 1.0, 81, 'Mild'],
       [1.0, 0.0, 100.0, 0.0, 1.0, 0.0, 5, 'Mild'],
       [1.0, 0.0, 100.0, 0.0, 1.0, 0.0, 19, 'Mild'],
       [0.0, 1.0, 100.0, 1.0, 0.0, 0.0, 27, 'Mild'],
       [1.0, 0.0, 103.0, 1.0, 0.0, 0.0, 73, 'Mild'],
       [0.0, 1.0, 103.0, 0.0, 1.0, 0.0, 70, 'Strong'],
       [1.0, 0.0, 102.0, 1.0, 0.0, 0.0, 49, 'Mild'],
       [1.0, 0.0, 101.0, 0.0, 1.0, 0.0, 51, 'Strong'],
       [1.0, 0.0, 101.0, 1.0, 0.0, 0.0, 64, 'Mild'],
       [1.0, 0.0, 101.0, 0.0, 1.0, 0.0, 83, 'Mild'],
       [1.0, 0.0, 98.0, 0.0, 0.0, 1.0, 65, 'Mild'],
       [1.0, 0.0, 104.0, 0.0, 0.0, 0.0, 18, 'Mild'],
       [1.0, 0.0, 103.0, 0.0, 0.0, 0.0, 16, 'Mild'],
       [0.0, 1.0, 104.0, 0.0, 1.0, 0.0, 16, 'Mild'],
       [0.0, 1.0, 100.0, 0.0, 1.0, 0.0, 27, 'Mild'],
       [1.0, 0.0, 101.0, 0.0, 0.0, 0.0, 84, 'Mild'],
       [0.0, 1.0, 104.0, 0.0, 1.0, 0.0, 51, 'Mild'],
       [1.0, 0.0, 102.0, 0.0, 0.0, 0.0, 69, 'Mild'],
       [1.0, 0.0, 102.0, 0.0, 0.0, 0.0, 82, 