In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame([
    ['green','M',10.1,'class2'],
    ['red','L',13.5,'class1'],
    ['blue','XL',15.3,'class2'],
])
df.columns = ['color','size','price','classlabel']
df

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [3]:
#map sizes as ordinal features
size_mapping = {'XL':3, 'L':2, 'M':1}
df['size'] = df['size'].map(size_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class2
1,red,2,13.5,class1
2,blue,3,15.3,class2


In [4]:
#if want to revert
#inv_size_mapping 
inv_size_mapping = {v: k for k, v in size_mapping.items()}
df['size'].map(inv_size_mapping)

0     M
1     L
2    XL
Name: size, dtype: object

In [5]:
{v: k for k, v in size_mapping.items()}

{3: 'XL', 2: 'L', 1: 'M'}

## Encoding Class Labels

In [6]:
import numpy as np

In [7]:
class_mapping = {label: idx for idx, label in 
    enumerate(np.unique(df['classlabel']))}
class_mapping

{'class1': 0, 'class2': 1}

In [8]:
df['classlabel']

0    class2
1    class1
2    class2
Name: classlabel, dtype: object

In [9]:
df['classlabel'] = df['classlabel'].map(class_mapping)
df['classlabel']

0    1
1    0
2    1
Name: classlabel, dtype: int64

In [10]:
df

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,1
1,red,2,13.5,0
2,blue,3,15.3,1


In [11]:
inv_class_mapping = {v:k for k,v in class_mapping.items()}
inv_class_mapping

{0: 'class1', 1: 'class2'}

In [12]:
df['classlabel'] = df['classlabel'].map(inv_class_mapping)
df

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class2
1,red,2,13.5,class1
2,blue,3,15.3,class2


In [23]:
#using sklearn LabelEncoder
from sklearn.preprocessing import LabelEncoder
class_le = LabelEncoder()
y = class_le.fit_transform(df['classlabel'].values)
y

array([1, 0, 1])

In [24]:
class_le.inverse_transform(y)

array(['class2', 'class1', 'class2'], dtype=object)

In [20]:
df['classlabel'].values

array(['class2', 'class1', 'class2'], dtype=object)

In [35]:
#one-hot encoding
from sklearn.preprocessing import OneHotEncoder
X = df[['color','size','price']].values
color_ohe = OneHotEncoder()

In [34]:
X[]

array([[1, 1, 10.1],
       [2, 2, 13.5],
       [0, 3, 15.3]], dtype=object)

array([1, 2, 0])