In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("D:\\DataSet\\covid_toy.csv")
df.head()

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No
3,31,Female,98.0,Mild,Kolkata,No
4,65,Female,101.0,Mild,Mumbai,No


In [3]:
df.isnull().sum()

age           0
gender        0
fever        10
cough         0
city          0
has_covid     0
dtype: int64

In [4]:
from sklearn.impute import SimpleImputer

In [5]:
si = SimpleImputer()

In [6]:
df["fever"] = si.fit_transform(df[["fever"]])

In [7]:
df.isnull().sum()

age          0
gender       0
fever        0
cough        0
city         0
has_covid    0
dtype: int64

# LabelEncoding

In [8]:
df1 = df.copy()
df1.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [9]:
from sklearn.preprocessing import LabelEncoder

In [10]:
le = LabelEncoder()

In [11]:
df1["gender"] = le.fit_transform(df1["gender"])
df1["cough"] = le.fit_transform(df1["cough"])
df1["city"] = le.fit_transform(df1["city"])
df1["has_covid"] = le.fit_transform(df1["has_covid"])

In [12]:
df1.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,1,103.0,0,2,0
1,27,1,100.0,0,1,1
2,42,1,101.0,0,1,0


# OrdinalEncoding

In [13]:
df2 = df.copy()
df2.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [14]:
df2 = df2.drop(columns = ["age", "fever"])
df2.head(3)

Unnamed: 0,gender,cough,city,has_covid
0,Male,Mild,Kolkata,No
1,Male,Mild,Delhi,Yes
2,Male,Mild,Delhi,No


In [15]:
from sklearn.preprocessing import OrdinalEncoder

In [16]:
oe = OrdinalEncoder(categories = [
    ["Male","Female"], 
    ["Mild","Strong"], 
    ["Kolkata","Bangalore","Delhi","Mumbai"], 
    ["No","Yes"]
])

In [17]:
df2_new = oe.fit_transform(df2)

In [18]:
df2_new_df = pd.DataFrame(df2_new, columns = df2.columns)
df2_new_df.sample(5)

Unnamed: 0,gender,cough,city,has_covid
97,1.0,0.0,1.0,0.0
51,1.0,1.0,0.0,1.0
93,0.0,0.0,0.0,1.0
94,0.0,1.0,0.0,1.0
38,1.0,0.0,2.0,1.0


# OneHotEncoding

In [19]:
df3 = df.copy()
df3.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [20]:
from sklearn.preprocessing import OneHotEncoder

In [21]:
ohe = OneHotEncoder(drop = "first", sparse_output = False, dtype=np.int32)

In [22]:
df_new = ohe.fit_transform(df[["gender", "cough", "city", "has_covid"]])

In [23]:
df_new

array([[1, 0, 0, 1, 0, 0],
       [1, 0, 1, 0, 0, 1],
       [1, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1],
       [1, 1, 0, 0, 0, 0],
       [0, 1, 0, 0, 1, 1],
       [0, 1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 1],
       [0, 1, 0, 1, 0, 0],
       [1, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 1],
       [0, 0, 0, 1, 0, 1],
       [0, 1, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0, 1],
       [1, 1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 1],
       [0, 1, 0, 1, 0, 1],
       [0, 0, 1, 0, 0, 1],
       [0, 1, 0, 1, 0, 0],
       [1, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 1],
       [0, 1, 1, 0, 0, 0],
       [1, 0, 0, 1, 0, 0],
       [0, 1, 0, 0, 1, 1],
       [1, 0, 1, 0, 0, 1],
       [1, 0, 0, 1, 0, 0],
       [0, 1, 1, 0, 0, 1],
       [0, 0, 0, 1, 0, 0],
       [1, 0, 0, 0, 1, 1],
       [0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
 

# get_dummies

In [24]:
df4 = df.copy()
df4.head(3)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes
2,42,Male,101.0,Mild,Delhi,No


In [25]:
x = pd.get_dummies(df4, columns = ["gender", "cough", "city", "has_covid"], drop_first=True)
x = x.astype(int)
x

Unnamed: 0,age,fever,gender_Male,cough_Strong,city_Delhi,city_Kolkata,city_Mumbai,has_covid_Yes
0,60,103,1,0,0,1,0,0
1,27,100,1,0,1,0,0,1
2,42,101,1,0,1,0,0,0
3,31,98,0,0,0,1,0,0
4,65,101,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...
95,12,104,0,0,0,0,0,0
96,51,101,0,1,0,1,0,1
97,20,101,0,0,0,0,0,0
98,5,98,0,1,0,0,1,0
