## 1. Nominal/OHE Encoding

In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

## Create a simple dataframe 
df = pd.DataFrame({
    'color': ['red', 'blue', 'green', 'green', 'red', 'blue']
})

df.head()

Unnamed: 0,color
0,red
1,blue
2,green
3,green
4,red


In [2]:
## create an instance of Onehotencoder
encoder=OneHotEncoder()

## perform fit and transform
encoded = encoder.fit_transform(df[['color']]).toarray()

In [3]:
import pandas as pd
encoder_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out())

encoder_df

Unnamed: 0,color_blue,color_green,color_red
0,0.0,0.0,1.0
1,1.0,0.0,0.0
2,0.0,1.0,0.0
3,0.0,1.0,0.0
4,0.0,0.0,1.0
5,1.0,0.0,0.0


In [4]:
# for new data
encoder.transform([['blue']]).toarray()



array([[1., 0., 0.]])

In [5]:
pd.concat([df, encoder_df], axis=1)

Unnamed: 0,color,color_blue,color_green,color_red
0,red,0.0,0.0,1.0
1,blue,1.0,0.0,0.0
2,green,0.0,1.0,0.0
3,green,0.0,1.0,0.0
4,red,0.0,0.0,1.0
5,blue,1.0,0.0,0.0


In [7]:
import seaborn as sns

df_tips = sns.load_dataset('tips')
df_tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [13]:
print(f'Sex: {df_tips['sex'].unique()}')
print(f'Smoker: {df_tips['smoker'].unique()}')
print(f'Day: {df_tips['day'].unique()}')
print(f'Time: {df_tips['time'].unique()}')

Sex: ['Female', 'Male']
Categories (2, object): ['Male', 'Female']
Smoker: ['No', 'Yes']
Categories (2, object): ['Yes', 'No']
Day: ['Sun', 'Sat', 'Thur', 'Fri']
Categories (4, object): ['Thur', 'Fri', 'Sat', 'Sun']
Time: ['Dinner', 'Lunch']
Categories (2, object): ['Lunch', 'Dinner']


In [14]:
encoder = OneHotEncoder()
encoded = encoder.fit_transform(df_tips[['sex']]).toarray()
df_sex_encoded = pd.DataFrame(encoded, columns=encoder.get_feature_names_out())
df_sex_encoded.head()

Unnamed: 0,sex_Female,sex_Male
0,1.0,0.0
1,0.0,1.0
2,0.0,1.0
3,0.0,1.0
4,1.0,0.0


In [15]:
encoder = OneHotEncoder()
encoded = encoder.fit_transform(df_tips[['day']]).toarray()
df_day_encoded = pd.DataFrame(encoded, columns=encoder.get_feature_names_out())
df_day_encoded.head()

Unnamed: 0,day_Fri,day_Sat,day_Sun,day_Thur
0,0.0,0.0,1.0,0.0
1,0.0,0.0,1.0,0.0
2,0.0,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0


## 2. Label Encoding

In [16]:
df.head()

Unnamed: 0,color
0,red
1,blue
2,green
3,green
4,red


In [17]:
from sklearn.preprocessing import LabelEncoder

lbl_encoder = LabelEncoder()
lbl_encoder.fit_transform(df[['color']])

  y = column_or_1d(y, warn=True)


array([2, 0, 1, 1, 2, 0])

In [18]:
lbl_encoder.transform([['red']])

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


array([2])

In [19]:
lbl_encoder.transform([['blue']])

  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


array([0])

## 3. Ordinal Encoding

In [20]:
from sklearn.preprocessing import OrdinalEncoder

# create a sample dataframe with an ordinal variable
df = pd.DataFrame({
    'size': ['small', 'medium', 'large', 'medium', 'small', 'large']
})

df

Unnamed: 0,size
0,small
1,medium
2,large
3,medium
4,small
5,large


In [21]:
## create an instance of ORdinalEncoder and then fit_transform
encoder = OrdinalEncoder(categories=[['small', 'medium', 'large']])

encoder.fit_transform(df[['size']])

array([[0.],
       [1.],
       [2.],
       [1.],
       [0.],
       [2.]])

In [22]:
encoder.transform([['small']])



array([[0.]])