In [1]:
import numpy as np

In [2]:
from sklearn.experimental import enable_iterative_imputer

In [3]:
from sklearn.impute import SimpleImputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import normalize
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder

In [4]:
X1 = [[1, -1, 2],
      [2, 0, 0],
      [0, 1, -1]]

X2 = [['male', 'US', 'Safari'],
      ['female', 'Europe', 'Firefox'],
      ['female', 'Asia', 'Chrome']]

X3 = [['male'],
      ['female'],
      [np.nan],
      ['female']]

X4 =[['male', 'Safari'],
     ['female', None],
     [np.nan, 'Firefox']]

X5 = [[1, 2],
      [3, 6],
      [4, 8],
      [np.nan, 3],
      [7, np.nan]]

X6 = [["a", "x"],
      [np.nan, "y"],
      ["a", np.nan],
      ["b", "y"]]

##**Standard Scaler**

In [5]:
scaler = StandardScaler()
X_new_ss = scaler.fit_transform(X1)
print(np.array(X1))
print('\n')
print(X_new_ss)

[[ 1 -1  2]
 [ 2  0  0]
 [ 0  1 -1]]


[[ 0.         -1.22474487  1.33630621]
 [ 1.22474487  0.         -0.26726124]
 [-1.22474487  1.22474487 -1.06904497]]


##**Min Max Scaler**

In [6]:
min_max_scaler = MinMaxScaler()
X_new_mm = min_max_scaler.fit_transform(X1)
print(np.array(X1))
print('\n')
print(X_new_mm)

[[ 1 -1  2]
 [ 2  0  0]
 [ 0  1 -1]]


[[0.5        0.         1.        ]
 [1.         0.5        0.33333333]
 [0.         1.         0.        ]]


##**Normalization**

In [7]:
X_new_l2 = normalize(X1, norm="l2")
print(np.array(X1))
print('\n')
print("l2 normalization")
print(X_new_l2)

[[ 1 -1  2]
 [ 2  0  0]
 [ 0  1 -1]]


l2 normalization
[[ 0.40824829 -0.40824829  0.81649658]
 [ 1.          0.          0.        ]
 [ 0.          0.70710678 -0.70710678]]


In [8]:
X_new_l1 = normalize(X1, norm="l1")
print(np.array(X1))
print('\n')
print("l1 normalization")
print(X_new_l1)

[[ 1 -1  2]
 [ 2  0  0]
 [ 0  1 -1]]


l1 normalization
[[ 0.25 -0.25  0.5 ]
 [ 1.    0.    0.  ]
 [ 0.    0.5  -0.5 ]]


##**Ordinal Encoder**

In [9]:
oe = OrdinalEncoder()
X_new1_oe = oe.fit_transform(X2)
print(np.array(X2))
print('\n')
print(X_new1_oe)

[['male' 'US' 'Safari']
 ['female' 'Europe' 'Firefox']
 ['female' 'Asia' 'Chrome']]


[[1. 2. 2.]
 [0. 1. 1.]
 [0. 0. 0.]]


In [10]:
oe = OrdinalEncoder()
X_new2_oe = oe.fit_transform(X3)
print(np.array(X3))
print('\n')
print(X_new2_oe)

[['male']
 ['female']
 ['nan']
 ['female']]


[[ 1.]
 [ 0.]
 [nan]
 [ 0.]]


In [11]:
oe = OrdinalEncoder(encoded_missing_value=-1)
X_new3_oe = oe.fit_transform(X3)
print(np.array(X3))
print('\n')
print(X_new3_oe)

[['male']
 ['female']
 ['nan']
 ['female']]


[[ 1.]
 [ 0.]
 [-1.]
 [ 0.]]


##**One Hot Encoder**

In [12]:
ohe = OneHotEncoder()
X_new1_ohe = ohe.fit_transform(X2).toarray()
print(np.array(X2))
print('\n')
print(X_new1_ohe)

[['male' 'US' 'Safari']
 ['female' 'Europe' 'Firefox']
 ['female' 'Asia' 'Chrome']]


[[0. 1. 0. 0. 1. 0. 0. 1.]
 [1. 0. 0. 1. 0. 0. 1. 0.]
 [1. 0. 1. 0. 0. 1. 0. 0.]]


In [13]:
ohe = OneHotEncoder(drop="first")
X_new2_ohe = ohe.fit_transform(X2).toarray()
print(np.array(X2))
print('\n')
print(X_new2_ohe)

[['male' 'US' 'Safari']
 ['female' 'Europe' 'Firefox']
 ['female' 'Asia' 'Chrome']]


[[1. 0. 1. 0. 1.]
 [0. 1. 0. 1. 0.]
 [0. 0. 0. 0. 0.]]


In [14]:
ohe = OneHotEncoder(handle_unknown="error")
X_new3_ohe = ohe.fit_transform(X4).toarray()
print(np.array(X4))
print('\n')
print(X_new3_ohe)

[['male' 'Safari']
 ['female' None]
 [nan 'Firefox']]


[[0. 1. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0. 1.]
 [0. 0. 1. 1. 0. 0.]]


##**Univariate Feature Imputation**

In [15]:
ufi = SimpleImputer(missing_values=np.nan, strategy="mean")
X_new1_ufi = ufi.fit_transform(X5)
print(np.array(X5))
print('\n')
print(X_new1_ufi)

[[ 1.  2.]
 [ 3.  6.]
 [ 4.  8.]
 [nan  3.]
 [ 7. nan]]


[[1.   2.  ]
 [3.   6.  ]
 [4.   8.  ]
 [3.75 3.  ]
 [7.   4.75]]


In [16]:
ufi = SimpleImputer(strategy="most_frequent")
X_new2_ufi = ufi.fit_transform(X6)
print(np.array(X6))
print('\n')
print(X_new2_ufi)

[['a' 'x']
 ['nan' 'y']
 ['a' 'nan']
 ['b' 'y']]


[['a' 'x']
 ['a' 'y']
 ['a' 'y']
 ['b' 'y']]


##**Multivariate Feature Imputation**

In [17]:
mfi = IterativeImputer(max_iter=10, random_state=0)
X_new_mfi = np.round(mfi.fit_transform(X5))
print(np.array(X5))
print('\n')
print(X_new_mfi)

[[ 1.  2.]
 [ 3.  6.]
 [ 4.  8.]
 [nan  3.]
 [ 7. nan]]


[[ 1.  2.]
 [ 3.  6.]
 [ 4.  8.]
 [ 2.  3.]
 [ 7. 14.]]
