# One-Hot Encoding

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# Sample dataset
data = pd.DataFrame({'Sector': ['Tech', 'Healthcare', 'Finance', 'Tech', 'Finance']})

# Use sparse_output 
encoder = OneHotEncoder(sparse_output=False)
encoded = encoder.fit_transform(data[['Sector']])

# Convert to DataFrame
encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out())

print(encoded_df)


   Sector_Finance  Sector_Healthcare  Sector_Tech
0             0.0                0.0          1.0
1             0.0                1.0          0.0
2             1.0                0.0          0.0
3             0.0                0.0          1.0
4             1.0                0.0          0.0


# Label Encoding

In [2]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
data['Sector_Encoded'] = label_encoder.fit_transform(data['Sector'])
print(data)

       Sector  Sector_Encoded
0        Tech               2
1  Healthcare               1
2     Finance               0
3        Tech               2
4     Finance               0


# Target Encoding

In [3]:
import numpy as np

data['Stock_Returns'] = [0.05, 0.02, 0.07, 0.03, 0.06]
data['Sector_Target_Encoded'] = data.groupby('Sector')['Stock_Returns'].transform('mean')
print(data)

       Sector  Sector_Encoded  Stock_Returns  Sector_Target_Encoded
0        Tech               2           0.05                  0.040
1  Healthcare               1           0.02                  0.020
2     Finance               0           0.07                  0.065
3        Tech               2           0.03                  0.040
4     Finance               0           0.06                  0.065


# Frequency Encoding

In [4]:
data['Sector_Freq_Encoded'] = data['Sector'].map(data['Sector'].value_counts(normalize=True))
print(data)

       Sector  Sector_Encoded  Stock_Returns  Sector_Target_Encoded  \
0        Tech               2           0.05                  0.040   
1  Healthcare               1           0.02                  0.020   
2     Finance               0           0.07                  0.065   
3        Tech               2           0.03                  0.040   
4     Finance               0           0.06                  0.065   

   Sector_Freq_Encoded  
0                  0.4  
1                  0.2  
2                  0.4  
3                  0.4  
4                  0.4  


# Binary Encoding

In [None]:
!pip install category-encoders

In [6]:
from category_encoders import BinaryEncoder

encoder = BinaryEncoder(cols=['Sector'])
data_encoded = encoder.fit_transform(data)
print(data_encoded)

   Sector_0  Sector_1  Sector_Encoded  Stock_Returns  Sector_Target_Encoded  \
0         0         1               2           0.05                  0.040   
1         1         0               1           0.02                  0.020   
2         1         1               0           0.07                  0.065   
3         0         1               2           0.03                  0.040   
4         1         1               0           0.06                  0.065   

   Sector_Freq_Encoded  
0                  0.4  
1                  0.2  
2                  0.4  
3                  0.4  
4                  0.4  


# Hash Encoding

In [7]:
from category_encoders import HashingEncoder

encoder = HashingEncoder(cols=['Sector'], n_components=4)
data_encoded = encoder.fit_transform(data)
print(data_encoded)

   col_0  col_1  col_2  col_3  Sector_Encoded  Stock_Returns  \
0      0      1      0      0               2           0.05   
1      0      0      0      1               1           0.02   
2      1      0      0      0               0           0.07   
3      0      1      0      0               2           0.03   
4      1      0      0      0               0           0.06   

   Sector_Target_Encoded  Sector_Freq_Encoded  
0                  0.040                  0.4  
1                  0.020                  0.2  
2                  0.065                  0.4  
3                  0.040                  0.4  
4                  0.065                  0.4  
