In [1]:
import numpy as np
import pandas as pd 

In [2]:
def binary_encoding(df,column_name):
    #step 1: Map Categories into unique integers
    unique_categories = df[column_name].unique()
    # Create a dictionary mapping each category to an integer (0, 1, 2, ...)
    category_map = {category: i for i, category in enumerate(unique_categories)}
    # Apply the mapping to the DataFrame column
    df[f'{column_name}_int'] = df[column_name].map(category_map)
    # 2. Convert integers to binary representation
    max_int = max(category_map.values())
    # Determine the number of binary digits required (log2(max_int + 1))
    num_bits = int(np.ceil(np.log2(max_int + 1)))
    
    # Function to convert integer to a binary string with a fixed number of bits
    def int_to_binary_list(value, num_bits):
        # Format the integer into a binary string and pad with zeros
        binary_str = format(value, f'0{num_bits}b')
        # Convert the string of '0'/'1' to a list of integers [0, 1, ...]
        return [int(bit) for bit in binary_str]
    
    # Apply the binary conversion function to the integer column
    binary_representation = np.stack(df[f'{column_name}_int'].apply(lambda x: int_to_binary_list(x, num_bits)))
    
    # Create new DataFrame with binary columns
    binary_cols = [f'{column_name}_{i}' for i in range(num_bits)]
    df_binary = pd.DataFrame(binary_representation, columns=binary_cols, index=df.index)
    
    # Concatenate the new binary columns with the original DataFrame
    df = pd.concat([df, df_binary], axis=1)
    
    # Optionally, drop the original and intermediate integer columns
    df = df.drop(columns=[column_name, f'{column_name}_int'])
    return df




In [3]:

data = {'Color': ['Red', 'Green', 'Blue', 'Red', 'Green', 'Yellow', 'Blue', 'Yellow']}
df = pd.DataFrame(data)

encoded_df = binary_encoding(df, 'Color')
print(encoded_df)

   Color_0  Color_1
0        0        0
1        0        1
2        1        0
3        0        0
4        0        1
5        1        1
6        1        0
7        1        1


In [4]:
pip install category_encoders


Note: you may need to restart the kernel to use updated packages.


In [5]:
# Install category_encoders if not installed
# pip install category_encoders

import pandas as pd
import category_encoders as ce

# Sample DataFrame
df = pd.DataFrame({
    'Color': ['Red', 'Green', 'Blue', 'Yellow', 'Green', 'Blue'],
    'Value': [10, 15, 10, 20, 15, 10]
})

print("Original DataFrame:")
print(df)

# Initialize Binary Encoder
encoder = ce.BinaryEncoder(cols=['Color'])

# Fit and transform
df_encoded = encoder.fit_transform(df)

print("\nBinary Encoded DataFrame:")
print(df_encoded)

Original DataFrame:
    Color  Value
0     Red     10
1   Green     15
2    Blue     10
3  Yellow     20
4   Green     15
5    Blue     10

Binary Encoded DataFrame:
   Color_0  Color_1  Color_2  Value
0        0        0        1     10
1        0        1        0     15
2        0        1        1     10
3        1        0        0     20
4        0        1        0     15
5        0        1        1     10
