# Categorical Encoding Techniques .

### 1. Target Encoding


Target encoding involves replacing each category with the mean of the target variable for that category.



In [1]:
#import paskages
import pandas as pd

In [3]:
#  data
data = pd.DataFrame({
    'Category': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'B'],
    'Target': [1, 0, 1, 1, 0, 1, 1, 0]
})
data

Unnamed: 0,Category,Target
0,A,1
1,B,0
2,A,1
3,C,1
4,B,0
5,A,1
6,C,1
7,B,0


In [4]:
# Calculate target mean for each category
target_avg =data.groupby("Category")['Target'].mean()

In [5]:
# Replace categories with the mean target 
data['Category_Target_Encoded'] = data['Category'].map(target_avg)

In [6]:
data

Unnamed: 0,Category,Target,Category_Target_Encoded
0,A,1,1.0
1,B,0,0.0
2,A,1,1.0
3,C,1,1.0
4,B,0,0.0
5,A,1,1.0
6,C,1,1.0
7,B,0,0.0


### 2. Label Encoding


Label encoding converts categorical data into numeric labels.



In [7]:
#import paskages 
from sklearn.preprocessing import LabelEncoder

In [8]:
#data
data = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'B']})
data

Unnamed: 0,Category
0,A
1,B
2,A
3,C
4,B
5,A
6,C
7,B


In [9]:
# Initialize  LabelEncoder
label_E = LabelEncoder()

In [10]:
# Apply label encoding
data['Category_Label_Encoded'] =label_E.fit_transform(data['Category'])

In [11]:
data

Unnamed: 0,Category,Category_Label_Encoded
0,A,0
1,B,1
2,A,0
3,C,2
4,B,1
5,A,0
6,C,2
7,B,1


### 3. One-Hot Encoding


 One-hot encoding converts categorical variables into binary columns, where each category is represented by a column.



In [12]:
#data
data = pd.DataFrame({'Category': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'B']})


In [13]:
# Apply one-hot encoding using pandas
data_onehot=pd.get_dummies(data['Category'],prefix='Category')

In [14]:
# Concatenate the original data with the one-hot encoded columns
data=pd.concat([data,data_onehot],axis=1)

In [15]:
data

Unnamed: 0,Category,Category_A,Category_B,Category_C
0,A,1,0,0
1,B,0,1,0
2,A,1,0,0
3,C,0,0,1
4,B,0,1,0
5,A,1,0,0
6,C,0,0,1
7,B,0,1,0
