# One Hot Encoding

A method for converting categorical variables into a binary format. It creates new columns for each category with the data reprsented in binary. The primary purpose of One Hot Encoding is to ensure that categorical data can be effectively used in machine learning models.

In [None]:
import pandas as pd

dataframe = pd.DataFrame({
    'Employee id': [10, 20, 15, 25, 30],
    'Gender': ['M', 'F', 'F', 'M', 'F'],
    'Remarks': ['Good', 'Nice', 'Good', 'Great', 'Nice']
})

# Display the original DataFrame
print(f"Original Employee Data:\n{dataframe}\n")
# Use pd.get_dummies() to one-hot encode the categorical columns
df_pandas_encoded = pd.get_dummies(dataframe, columns=['Gender', 'Remarks'], drop_first=True)

print(f"One-Hot Encoded Data using Pandas:\n{df_pandas_encoded}\n")

In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

dataframe = pd.DataFrame({
    'Employee id': [10, 20, 15, 25, 30],
    'Gender': ['M', 'F', 'F', 'M', 'F'],
    'Remarks': ['Good', 'Nice', 'Good', 'Great', 'Nice']
})
print(f"Original Employee Data:\n{dataframe}\n")


# Initialize OneHotEncoder
encoder = OneHotEncoder(drop='first', sparse_output=False)

# Select categorical columns to encode
categorical_columns = ['Gender', 'Remarks']
encoded_data = encoder.fit_transform(dataframe[categorical_columns])

# Create a DataFrame for the encoded data
encoded_columns = encoder.get_feature_names_out(categorical_columns)
encoded_df = pd.DataFrame(encoded_data, columns=encoded_columns)

# Combine the encoded data with the original DataFrame (excluding th
final_df = pd.concat([dataframe.drop(columns=categorical_columns), encoded_df], axis=1)

print(f"One-Hot Encoded Data using sklearn:\n{final_df}\n")