# One Hot Encoding
One hot encoding is a method for converting categorical variables into the numerical or binary format. It creates new column for each category where 1 means the category is present and 0 means it is not. The primary purpose of One Hot encoding is to calculate the categorical data.

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

In [3]:
data = {
    'Employee_id': [10, 20, 30, 40, 50],
    'Gender': ["M", "F", "F", "M", "F"],
    'Remarks': ["Good", "Nice", "Good", "Great", "Good"]
}

df = pd.DataFrame(data)
print(f"Original Employee Data: \n{data}\n")
df_pandas_encoded = pd.get_dummies(df, columns=['Gender', "Remarks"], drop_first=True)
print(f"One Hot encoded Data using pandas dummies\n {df_pandas_encoded}\n")

Original Employee Data: 
{'Employee_id': [10, 20, 30, 40, 50], 'Gender': ['M', 'F', 'F', 'M', 'F'], 'Remarks': ['Good', 'Nice', 'Good', 'Great', 'Good']}

One Hot encoded Data using pandas dummies
    Employee_id  Gender_M  Remarks_Great  Remarks_Nice
0           10      True          False         False
1           20     False          False          True
2           30     False          False         False
3           40      True           True         False
4           50     False          False         False



In [6]:
df_pandas = pd.get_dummies(df[['Gender', 'Remarks']], drop_first=True)
print(f"One hot encoding Data using pandas dummies in another way\n{df_pandas}\n")

One hot encoding Data using pandas dummies in another way
   Gender_M  Remarks_Great  Remarks_Nice
0      True          False         False
1     False          False          True
2     False          False         False
3      True           True         False
4     False          False         False



In [11]:
encoder = OneHotEncoder(sparse_output=False)
categorical_columns = ["Remarks", "Gender"]
one_hot_encoded = encoder.fit_transform(df[categorical_columns])
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))
print()
df_sklearn_encoded = pd.concat([df.drop(categorical_columns, axis=1), one_hot_df], axis=1)
print(f"One Hot encoded Data using scikit learn:\n{df_sklearn_encoded}\n")

One Hot encoded Data using scikit learn:
   Employee_id  Remarks_Good  Remarks_Great  Remarks_Nice  Gender_F  Gender_M
0           10           1.0            0.0           0.0       0.0       1.0
1           20           0.0            0.0           1.0       1.0       0.0
2           30           1.0            0.0           0.0       1.0       0.0
3           40           0.0            1.0           0.0       0.0       1.0
4           50           1.0            0.0           0.0       1.0       0.0



In [14]:
encoder = OneHotEncoder(sparse_output=False)
categorical_columns = ["Remarks", "Gender"]
one_hot_encoded = encoder.fit_transform(df[categorical_columns])
print(f"features of name: {encoder.get_feature_names_out(categorical_columns)}")
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))
print(f"One hot encoded dataframe\n{one_hot_df}\n")


features of name: ['Remarks_Good' 'Remarks_Great' 'Remarks_Nice' 'Gender_F' 'Gender_M']
One hot encoded dataframe
   Remarks_Good  Remarks_Great  Remarks_Nice  Gender_F  Gender_M
0           1.0            0.0           0.0       0.0       1.0
1           0.0            0.0           1.0       1.0       0.0
2           1.0            0.0           0.0       1.0       0.0
3           0.0            1.0           0.0       0.0       1.0
4           1.0            0.0           0.0       1.0       0.0



In [20]:
encoded_df = pd.concat([df.drop(categorical_columns, axis=1), one_hot_df], axis=1)
print(encoded_df)

   Employee_id  Remarks_Good  Remarks_Great  Remarks_Nice  Gender_F  Gender_M
0           10           1.0            0.0           0.0       0.0       1.0
1           20           0.0            0.0           1.0       1.0       0.0
2           30           1.0            0.0           0.0       1.0       0.0
3           40           0.0            1.0           0.0       0.0       1.0
4           50           1.0            0.0           0.0       1.0       0.0
