In [1]:
import numpy as np
import sklearn

In [2]:
feature = np.array([[-500.5],
                    [-100.1],
                    [0],
                    [100.1],
                    [900.9]])

# Min Max scaling (Normalization)

In [3]:
from sklearn.preprocessing import MinMaxScaler

In [4]:
min_max_scaler = MinMaxScaler()

In [5]:
min_max_scaler.fit_transform(feature)

array([[0.        ],
       [0.28571429],
       [0.35714286],
       [0.42857143],
       [1.        ]])

# Standardization

In [6]:
std_scaler = sklearn.preprocessing.StandardScaler()

In [7]:
std_scaler.fit_transform(feature)

array([[-1.26687088],
       [-0.39316683],
       [-0.17474081],
       [ 0.0436852 ],
       [ 1.79109332]])

In [8]:
# mean = 0, standard deviation = 1
std_scaler.fit_transform(feature).mean(),std_scaler.fit_transform(feature).std()

(0.0, 1.0)

# L1 and L2 norm

In [9]:
L1 = sklearn.preprocessing.Normalizer(norm='l1')

In [10]:
feature.T

array([[-500.5, -100.1,    0. ,  100.1,  900.9]])

In [11]:
L1.transform(feature.T)

array([[-0.3125, -0.0625,  0.    ,  0.0625,  0.5625]])

In [12]:
L2 = sklearn.preprocessing.Normalizer(norm='l2')

In [13]:
L2.transform(feature.T)

array([[-0.48112522, -0.09622504,  0.        ,  0.09622504,  0.8660254 ]])

# Creating polynomial features

In [14]:
a = np.array([
    [2,3],
    [4,5]
])

In [15]:
from sklearn.preprocessing import PolynomialFeatures

In [16]:
poly = PolynomialFeatures(degree=2,include_bias=False)

In [17]:
poly.fit_transform(a)

array([[ 2.,  3.,  4.,  6.,  9.],
       [ 4.,  5., 16., 20., 25.]])

In [18]:
poly = PolynomialFeatures(degree=3,include_bias=False)

In [19]:
poly.fit_transform(a)

array([[  2.,   3.,   4.,   6.,   9.,   8.,  12.,  18.,  27.],
       [  4.,   5.,  16.,  20.,  25.,  64.,  80., 100., 125.]])

# Custom transformer function

In [20]:
def add_10(x):
    return x+10

In [21]:
from sklearn.preprocessing import FunctionTransformer

In [22]:
func = FunctionTransformer(add_10)

In [23]:
func.fit_transform(a)

array([[12, 13],
       [14, 15]])

# Binning (Descretization)

In [24]:
a

array([[2, 3],
       [4, 5]])

In [25]:
np.digitize(a,bins=[3,4])

array([[0, 1],
       [2, 2]], dtype=int64)

# Impute missing values

In [26]:
from sklearn.impute import SimpleImputer

In [27]:
mean_imputer = SimpleImputer(strategy='mean')

In [28]:
mean_imputer.fit_transform(a)

array([[2., 3.],
       [4., 5.]])