In [1]:
import pandas as pd

In [2]:
# Create a sample DataFrame with categorical data
data = {
    'Category': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'C', 'B'],
    'Color': ['Red', 'Green', 'Blue', 'Red', 'Green', 'Red', 'Blue', 'Red', 'Green'],
    'Size': ['Small', 'Medium', 'Large', 'Small', 'Medium', 'Large', 'Medium', 'Small', 'Large'],
    'Label': [1, 0, 1, 0, 1, 0, 1, 1, 0]
}
df = pd.DataFrame(data)

In [3]:
# Example 1: One-Hot Encoding using pd.get_dummies
df_encoded = pd.get_dummies(df, columns=['Category', 'Color', 'Size'], prefix=['Cat', 'Col', 'Size'])


In [4]:
# Example 2: Label Encoding using pd.factorize
df['Size_Label'] = pd.factorize(df['Size'])[0]


In [5]:
# Example 3: Ordinal Encoding using a custom mapping
size_mapping = {'Small': 1, 'Medium': 2, 'Large': 3}
df['Size_Ordinal'] = df['Size'].map(size_mapping)


In [8]:
pip install category-encoders

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
# Example 4: Binary Encoding using category_encoders library
import category_encoders as ce
encoder = ce.BinaryEncoder(cols=['Size'])
df_binary_encoded = encoder.fit_transform(df)


In [10]:
# Example 5: Count Encoding using category_encoders
encoder = ce.CountEncoder(cols=['Category'])
df_count_encoded = encoder.fit_transform(df)

In [11]:
# Example 6: Target Encoding using category_encoders
encoder = ce.TargetEncoder(cols=['Color'])
df_target_encoded = encoder.fit_transform(df, df['Label'])

In [12]:
# Example 7: Mean Encoding using groupby and transform
mean_encoded = df.groupby('Color')['Label'].transform('mean')
df['Color_Mean_Encoded'] = mean_encoded

In [13]:
# Example 8: Frequency Encoding using value_counts
freq_encoding = df['Color'].map(df['Color'].value_counts())
df['Color_Freq_Encoded'] = freq_encoding

In [14]:
# Example 9: Hashing Encoding using category_encoders
encoder = ce.HashingEncoder(cols=['Category'], n_components=3)
df_hash_encoded = encoder.fit_transform(df)

In [15]:
# Example 10: Leave-One-Out Encoding using category_encoders
encoder = ce.LeaveOneOutEncoder(cols=['Color'])
df_loo_encoded = encoder.fit_transform(df, df['Label'])

In [16]:
# Display a sample of the DataFrame
print(df.head(3))

  Category  Color    Size  Label  Size_Label  Size_Ordinal  \
0        A    Red   Small      1           0             1   
1        B  Green  Medium      0           1             2   
2        A   Blue   Large      1           2             3   

   Color_Mean_Encoded  Color_Freq_Encoded  
0            0.500000                   4  
1            0.333333                   3  
2            1.000000                   2  
