In [39]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [25]:
df = pd.DataFrame([
        ['green', 'M', 10.1, 'class1'],
        ['red', 'XL', 12.2, 'class2'],
        ['blue', 'M', 11.5, 'class1'],
        ['green', 'L', 13.2, 'class2'],
    ])
df.columns = ['Color', 'Size', 'Cost', 'Label']
df

Unnamed: 0,Color,Size,Cost,Label
0,green,M,10.1,class1
1,red,XL,12.2,class2
2,blue,M,11.5,class1
3,green,L,13.2,class2


In [26]:
def inversed_dict(d):
    return {v: k for k, v in d.items()}

In [27]:
size_mapping = {
    'XL': 5,
    'L': 4,
    'M': 3,
}
df['Size'] = df['Size'].map(size_mapping)

In [28]:
df

Unnamed: 0,Color,Size,Cost,Label
0,green,3,10.1,class1
1,red,5,12.2,class2
2,blue,3,11.5,class1
3,green,4,13.2,class2


In [29]:
class_mapping = {c: n for n, c in enumerate(np.unique(df['Label']))}

In [32]:
df['Label'] = df['Label'].map(class_mapping)
df

Unnamed: 0,Color,Size,Cost,Label
0,green,3,10.1,0
1,red,5,12.2,1
2,blue,3,11.5,0
3,green,4,13.2,1


In [33]:
df['Label'] = df['Label'].map(inversed_dict(class_mapping))
df

Unnamed: 0,Color,Size,Cost,Label
0,green,3,10.1,class1
1,red,5,12.2,class2
2,blue,3,11.5,class1
3,green,4,13.2,class2


In [36]:
class_le = LabelEncoder()
y = class_le.fit_transform(df['Label'])
y

array([0, 1, 0, 1])

In [37]:
class_le.inverse_transform(y)

array(['class1', 'class2', 'class1', 'class2'], dtype=object)

In [38]:
X = df[['Color', 'Size', 'Cost']].values
color_le = LabelEncoder()
X[:, 0] = color_le.fit_transform(X[:, 0])
X

array([[1, 3, 10.1],
       [2, 5, 12.2],
       [0, 3, 11.5],
       [1, 4, 13.2]], dtype=object)

In [41]:
ohe = OneHotEncoder(categorical_features=[0])
ohe.fit_transform(X).toarray()

array([[  0. ,   1. ,   0. ,   3. ,  10.1],
       [  0. ,   0. ,   1. ,   5. ,  12.2],
       [  1. ,   0. ,   0. ,   3. ,  11.5],
       [  0. ,   1. ,   0. ,   4. ,  13.2]])

In [45]:
pd.get_dummies(df[['Size', 'Cost', 'Color']])

Unnamed: 0,Size,Cost,Color_blue,Color_green,Color_red
0,3,10.1,0.0,1.0,0.0
1,5,12.2,0.0,0.0,1.0
2,3,11.5,1.0,0.0,0.0
3,4,13.2,0.0,1.0,0.0
