One Hot encoding for Inferencing

In [None]:

import pandas as pd

# Data with a categorical column
data = {
    'Fruit': ['Apple', 'Orange', 'Banana', 'Apple', 'Banana', 'Orange']
}

# Convert the data into a DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,Fruit
0,Apple
1,Orange
2,Banana
3,Apple
4,Banana
5,Orange


In [None]:


# Perform one-hot encoding using pandas' get_dummies() function
onehot_encoded = pd.get_dummies(df['Fruit'])
onehot_encoded

Unnamed: 0,Apple,Banana,Orange
0,1,0,0
1,0,0,1
2,0,1,0
3,1,0,0
4,0,1,0
5,0,0,1


In [None]:
#Concatenate the one-hot encoded DataFrame with the original DataFrame
df_encoded = pd.concat([df, onehot_encoded], axis=1)

print(df_encoded)

    Fruit  Apple  Banana  Orange
0   Apple      1       0       0
1  Orange      0       0       1
2  Banana      0       1       0
3   Apple      1       0       0
4  Banana      0       1       0
5  Orange      0       0       1


Example with two category columns

In [None]:

import pandas as pd

# Sample data with two categorical columns
data = {
    'Fruit': ['Apple', 'Orange', 'Banana', 'Apple', 'Banana', 'Orange'],
    'Color': ['Red', 'Orange', 'Yellow', 'Red', 'Yellow', 'Orange']
}


# Convert the data into a DataFrame
df = pd.DataFrame(data)

print (df)
# Perform one-hot encoding on both categorical columns
onehot_encoded = pd.get_dummies(df, columns=['Fruit', 'Color'])

print(onehot_encoded)

    Fruit   Color
0   Apple     Red
1  Orange  Orange
2  Banana  Yellow
3   Apple     Red
4  Banana  Yellow
5  Orange  Orange
   Fruit_Apple  Fruit_Banana  Fruit_Orange  Color_Orange  Color_Red  \
0            1             0             0             0          1   
1            0             0             1             1          0   
2            0             1             0             0          0   
3            1             0             0             0          1   
4            0             1             0             0          0   
5            0             0             1             1          0   

   Color_Yellow  
0             0  
1             0  
2             1  
3             0  
4             1  
5             0  


onehot encoding for a single record

In [None]:

import pandas as pd

# Create the record as a dictionary
record = {
    'Fruit': ['Apple'],
    'Color': ['Red']
}

# Convert the record into a DataFrame
df_record = pd.DataFrame(record)
df_record

Unnamed: 0,Fruit,Color
0,Apple,Red


In [None]:
#Perform one-hot encoding on the DataFrame
onehot_encoded_record = pd.get_dummies(df_record)

print(onehot_encoded_record)

   Fruit_Apple  Color_Red
0            1          1


Create an empty df record from onehot_encoded, and fill data with zero

In [None]:
#Create an empty DataFrame with the same columns as onehot_encoded
empty_df_record = pd.DataFrame(columns=onehot_encoded.columns)

# Fill the empty DataFrame with zeros
empty_df_record = empty_df_record.append(pd.Series([0] * len(empty_df_record.columns), index=empty_df_record.columns), ignore_index=True)

print(empty_df_record)

  Fruit_Apple Fruit_Banana Fruit_Orange Color_Orange Color_Red Color_Yellow
0           0            0            0            0         0            0


  empty_df_record = empty_df_record.append(pd.Series([0] * len(empty_df_record.columns), index=empty_df_record.columns), ignore_index=True)


Create a copy of empty onehot record and update it with onehot evoded record

In [None]:


#Create a copy of empty_df_record

import copy

# Create a deep copy
new_onehot_record = copy.deepcopy(empty_df_record)


new_onehot_record

Unnamed: 0,Fruit_Apple,Fruit_Banana,Fruit_Orange,Color_Orange,Color_Red,Color_Yellow
0,0,0,0,0,0,0


Update data

In [None]:
new_onehot_record.update(onehot_encoded_record)
new_onehot_record

Unnamed: 0,Fruit_Apple,Fruit_Banana,Fruit_Orange,Color_Orange,Color_Red,Color_Yellow
0,1,0,0,0,1,0
