In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame([['green', 'M', 10.1, 'class2'],
                  ['red', 'L', 13.5, 'class1'],
                  ['blue', 'XL', 15.3, 'class2']])
df.columns = ['color', 'size', 'price', 'classlabel']

In [3]:
df.head()

Unnamed: 0,color,size,price,classlabel
0,green,M,10.1,class2
1,red,L,13.5,class1
2,blue,XL,15.3,class2


In [4]:
# we have both nominal and ordinal features here
#color : green, red, blue (NOminal)
#Size : M , L ,XL (ordinal)
#Note: There is no function that can automatically map features in order ( rank them)
# size feature size  M, L, XL 
size_map = {'M':1,'L':2,'XL':3}
df['size'] = df['size'].map(size_map)

In [5]:
df.head()

Unnamed: 0,color,size,price,classlabel
0,green,1,10.1,class2
1,red,2,13.5,class1
2,blue,3,15.3,class2


In [6]:
#class labels are not ordinal
import numpy as  np
class_map = {label:indx for indx,label in enumerate(np.unique(df['classlabel']))}

In [7]:
class_map

{'class1': 0, 'class2': 1}

In [8]:
df['classlabel'] = df['classlabel'].map(class_map)

In [9]:
print(df)

   color  size  price  classlabel
0  green     1   10.1           1
1    red     2   13.5           0
2   blue     3   15.3           1


In [10]:
# we can use label encoder
from sklearn.preprocessing import LabelEncoder
labenc = LabelEncoder()
clses = labenc.fit_transform(df['classlabel'])

In [11]:
df.classlabel = clses
print(df)

   color  size  price  classlabel
0  green     1   10.1           1
1    red     2   13.5           0
2   blue     3   15.3           1


In [12]:
print(labenc.inverse_transform(clses)) # we can get original class labels

[1 0 1]


# Nominal features
* One Hot Encoder on nominal features ( No order)

In [13]:
#One Hot Encoder 
#Although the color values don't come in any particular order
#learning algorithm will now assume that green is larger than blue, and red is larger than green.
#A common solution for this is one hot encoding
#The idea behind this approach is to create a new dummy feature for each unique value in the nominal feature column.
'''Here, we would convert the color
feature into three new features: blue, green, and red. '''
'''A blue example can be
encoded as blue=1, green=0, red=0.'''
X = df[['color', 'size', 'price']].values
print(X)

[['green' 1 10.1]
 ['red' 2 13.5]
 ['blue' 3 15.3]]


In [14]:
X.shape

(3, 3)

In [15]:
X =(X.reshape(1,-1))


In [16]:
print(X)

[['green' 1 10.1 'red' 2 13.5 'blue' 3 15.3]]


In [17]:
 # More convenient way than using one hot encoder
# 
pd.get_dummies(df[['price', 'color', 'size']],drop_first=True)

Unnamed: 0,price,size,color_green,color_red
0,10.1,1,1,0
1,13.5,2,0,1
2,15.3,3,0,0


In [18]:
df['rt'] = ['one','two','three']
df['rt1'] = ['Nn','Mm','th']

In [19]:
df.head()

Unnamed: 0,color,size,price,classlabel,rt,rt1
0,green,1,10.1,1,one,Nn
1,red,2,13.5,0,two,Mm
2,blue,3,15.3,1,three,th


In [20]:
col1 = list(df.columns).pop(3)
col = list(df.columns)
col.remove('classlabel')
print(col)

['color', 'size', 'price', 'rt', 'rt1']


In [21]:
pd.get_dummies(df[col],drop_first=True)

Unnamed: 0,size,price,color_green,color_red,rt_three,rt_two,rt1_Nn,rt1_th
0,1,10.1,1,0,0,0,1,0
1,2,13.5,0,1,0,1,0,0
2,3,15.3,0,0,1,0,0,1
