In [58]:
# Encoding --- > Transform your categorical data into Numerical data .
# (1). Label -- > 1-1 column apply but we cannot set the order manually .
# (2). Ordinal -- > multi-column apply and we can set the order manually .
# (3). OneHotEncoder --- > has_covid[yes, no] -- > 2 column create -- > covid_yes, covid_no .
# (4). get_dummies --- > has_covid[yes, no] -- > 2 column create -- > column name with values

In [59]:
import numpy as np
import pandas as pd

In [60]:
df=pd.read_csv("/content/covid_toy.csv")

In [61]:
df.head(2)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes


In [62]:
df=df.dropna()

In [63]:
df=df.drop(columns=['city'])

In [64]:
## APPROACH-1. : LabelEncoder

In [65]:
from sklearn.preprocessing import LabelEncoder

In [66]:
lb=LabelEncoder()

In [67]:
df['gender']=lb.fit_transform(df['gender'])
df['cough']=lb.fit_transform(df['cough'])
df['has_covid']=lb.fit_transform(df['has_covid'])

In [68]:
df.head(2)

Unnamed: 0,age,gender,fever,cough,has_covid
0,60,1,103.0,0,0
1,27,1,100.0,0,1


In [69]:
## APPROACH-2: OrdinalEncoding

In [70]:
df=pd.read_csv("/content/covid_toy.csv")

In [71]:
df.head(2)

Unnamed: 0,age,gender,fever,cough,city,has_covid
0,60,Male,103.0,Mild,Kolkata,No
1,27,Male,100.0,Mild,Delhi,Yes


In [72]:
df=df.drop(columns=['city','age','fever'])

In [73]:
df=df.dropna()

In [74]:
df_new=df.copy()

In [75]:
from sklearn.preprocessing import OrdinalEncoder

In [76]:
oe=OrdinalEncoder(categories=[['Male','Female'],
                               ['Mild','Strong'],
                               ['No','Yes']])

In [77]:
oe_df=oe.fit_transform(df)

In [78]:
df_oe=pd.DataFrame(oe_df,columns=df_new.columns)

In [79]:
df_oe

Unnamed: 0,gender,cough,has_covid
0,0.0,0.0,0.0
1,0.0,0.0,1.0
2,0.0,0.0,0.0
3,1.0,0.0,0.0
4,1.0,0.0,0.0
...,...,...,...
95,1.0,0.0,0.0
96,1.0,1.0,1.0
97,1.0,0.0,0.0
98,1.0,1.0,0.0


In [80]:
## APPROACH 3: ONEHOTENCODER

In [81]:
df=pd.read_csv("/content/covid_toy.csv")

In [82]:
from sklearn.preprocessing import OneHotEncoder

In [83]:
ohe=OneHotEncoder(drop='first',sparse_output=False,dtype=np.int32)

In [84]:
ohe.fit_transform(df[['gender','cough','city','has_covid']])

array([[1, 0, 0, 1, 0, 0],
       [1, 0, 1, 0, 0, 1],
       [1, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1],
       [1, 1, 0, 0, 0, 0],
       [0, 1, 0, 0, 1, 1],
       [0, 1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 1],
       [0, 1, 0, 1, 0, 0],
       [1, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0, 0],
       [1, 1, 0, 1, 0, 1],
       [0, 0, 0, 1, 0, 1],
       [0, 1, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0, 1],
       [1, 1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 1],
       [0, 1, 0, 1, 0, 1],
       [0, 0, 1, 0, 0, 1],
       [0, 1, 0, 1, 0, 0],
       [1, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 1],
       [0, 1, 1, 0, 0, 0],
       [1, 0, 0, 1, 0, 0],
       [0, 1, 0, 0, 1, 1],
       [1, 0, 1, 0, 0, 1],
       [1, 0, 0, 1, 0, 0],
       [0, 1, 1, 0, 0, 1],
       [0, 0, 0, 1, 0, 0],
       [1, 0, 0, 0, 1, 1],
       [0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
 

In [85]:
## APPROACH: 4 GET_DUMMIES

In [86]:
df=pd.read_csv("/content/covid_toy.csv")

In [90]:
df=df.dropna()

In [91]:
df=pd.get_dummies(df,columns=['gender','cough','city','has_covid'],drop_first=True)

KeyError: "None of [Index(['gender', 'cough', 'city', 'has_covid'], dtype='object')] are in the [columns]"

In [92]:
df=df.astypes(int)

AttributeError: 'DataFrame' object has no attribute 'astypes'

In [93]:
df

Unnamed: 0,age,fever,gender_Male,cough_Strong,city_Delhi,city_Kolkata,city_Mumbai,has_covid_Yes
0,60,103.0,True,False,False,True,False,False
1,27,100.0,True,False,True,False,False,True
2,42,101.0,True,False,True,False,False,False
3,31,98.0,False,False,False,True,False,False
4,65,101.0,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...
95,12,104.0,False,False,False,False,False,False
96,51,101.0,False,True,False,True,False,True
97,20,101.0,False,False,False,False,False,False
98,5,98.0,False,True,False,False,True,False
