# Data Encoding
1.Nominal/OneHotEncoding
2.Label and Ordinal Encoding
3.Target Guided Ordinal Encoding

## OneHotEncoding

In [18]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder


In [19]:
##Create a simple DataFrame
df=pd.DataFrame({
    'color':['red','green','blue','green','yellow','Orange']
})
df.head()

Unnamed: 0,color
0,red
1,green
2,blue
3,green
4,yellow


In [20]:
## Creating an instance of OneHotEncoder
encoder=OneHotEncoder()

In [21]:
encode=encoder.fit_transform(df[['color']]).toarray()

In [22]:
en_df=pd.DataFrame(encode,columns=encoder.get_feature_names_out())
en_df

Unnamed: 0,color_Orange,color_blue,color_green,color_red,color_yellow
0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0,0.0
2,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0
5,1.0,0.0,0.0,0.0,0.0


In [23]:
encoder.transform([['blue']]).toarray()



array([[0., 1., 0., 0., 0.]])

In [24]:
pd.concat([df,en_df],axis=1)

Unnamed: 0,color,color_Orange,color_blue,color_green,color_red,color_yellow
0,red,0.0,0.0,0.0,1.0,0.0
1,green,0.0,0.0,1.0,0.0,0.0
2,blue,0.0,1.0,0.0,0.0,0.0
3,green,0.0,0.0,1.0,0.0,0.0
4,yellow,0.0,0.0,0.0,0.0,1.0
5,Orange,1.0,0.0,0.0,0.0,0.0


In [25]:
import seaborn as  sns
df1=sns.load_dataset('tips')
df1.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [26]:
encoder=OneHotEncoder()

encode=encoder.fit_transform(df[['sex']]).toarray()
en_df=pd.DataFrame(encode,columns=encoder.get_feature_names_out())
en_df

KeyError: "None of [Index(['sex'], dtype='object')] are in the [columns]"

## Label Encoding


In [28]:
df.head()

Unnamed: 0,color
0,red
1,green
2,blue
3,green
4,yellow


In [30]:
df.color.value_counts()

color
green     2
red       1
blue      1
yellow    1
Orange    1
Name: count, dtype: int64

In [29]:
from sklearn.preprocessing import LabelEncoder
lbl_encoder=LabelEncoder()
lbl_encoder.fit_transform(df[['color']])

  y = column_or_1d(y, warn=True)


array([3, 2, 1, 2, 4, 0])

In [31]:
### Ordinal Encoding

from sklearn.preprocessing import OrdinalEncoder
df=pd.DataFrame({
    'size':['small','large','medium','small','large']
})
df

Unnamed: 0,size
0,small
1,large
2,medium
3,small
4,large


In [32]:
encoder=OrdinalEncoder(categories=[['small','medium','large']])
encoder.fit_transform(df[['size']])

array([[0.],
       [2.],
       [1.],
       [0.],
       [2.]])

In [33]:
encoder.transform([['small']])



array([[0.]])

## Target Guided Ordinal Encoding

In [34]:
df=pd.DataFrame({
    'city':['new york','london','paris','Tokyo','new york','paris'],
    'price':[200,150,300,250,180,320]
})
df

Unnamed: 0,city,price
0,new york,200
1,london,150
2,paris,300
3,Tokyo,250
4,new york,180
5,paris,320


In [37]:
mean_price=df.groupby('city')['price'].mean().to_dict()
mean_price

{'Tokyo': 250.0, 'london': 150.0, 'new york': 190.0, 'paris': 310.0}

In [38]:
df['city_encode']=df['city'].map(mean_price)
df[['price','city_encode']]

Unnamed: 0,price,city_encode
0,200,190.0
1,150,150.0
2,300,310.0
3,250,250.0
4,180,190.0
5,320,310.0


In [39]:
import seaborn as sns
df=sns.load_dataset('tips')
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [42]:
mean_bill=df.groupby('time')['total_bill'].mean().to_dict()
mean_bill

  mean_bill=df.groupby('time')['total_bill'].mean().to_dict()


{'Lunch': 17.168676470588235, 'Dinner': 20.79715909090909}

In [44]:
df['mean_bill']=df['time'].map(mean_bill)

In [45]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,mean_bill
0,16.99,1.01,Female,No,Sun,Dinner,2,20.797159
1,10.34,1.66,Male,No,Sun,Dinner,3,20.797159
2,21.01,3.50,Male,No,Sun,Dinner,3,20.797159
3,23.68,3.31,Male,No,Sun,Dinner,2,20.797159
4,24.59,3.61,Female,No,Sun,Dinner,4,20.797159
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,20.797159
240,27.18,2.00,Female,Yes,Sat,Dinner,2,20.797159
241,22.67,2.00,Male,Yes,Sat,Dinner,2,20.797159
242,17.82,1.75,Male,No,Sat,Dinner,2,20.797159
