# **StandardScaler**

In [2]:
from sklearn.preprocessing import StandardScaler

In [4]:
'''
copy = true -> means to copy the data and work on it, don't change in original data
copy = false -> means to work on the original data
with_mean and std -> means allow using mean and standard deviation to scale data
'''
scaler = StandardScaler(copy = True, with_mean = True, with_std = True)
x = scaler.fit_transform(x)

In [6]:
''' examplllllllle '''
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
scaler = StandardScaler()
scaler.fit(data)
print(scaler.mean_)

[0.5 0.5]


In [7]:
newdata = scaler.transform(data)
print(newdata)

[[-1. -1.]
 [-1. -1.]
 [ 1.  1.]
 [ 1.  1.]]


In [9]:
''' anoter way '''
newdata = scaler.fit_transform(data)
print(newdata)

[[-1. -1.]
 [-1. -1.]
 [ 1.  1.]
 [ 1.  1.]]


In [10]:
''' examplllllllle_2 '''
data = [[3652, 1253], [21, 7745], [-3695, 150], [1525, -963]]
scaler = StandardScaler()
scaler.fit(data)
print(scaler.mean_)

[ 375.75 2046.25]


In [12]:
newdata = scaler.fit_transform(data)
print(newdata)

[[ 1.22201689 -0.23453864]
 [-0.13231911  1.68493802]
 [-1.51835948 -0.56066045]
 [ 0.4286617  -0.88973893]]


# **MinMaxScaler**

In [13]:
from sklearn.preprocessing import MinMaxScaler

In [19]:
'''
feature_range = (0, 1) means output equal 0 to 1
'''
data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
scaler = MinMaxScaler(copy = True, feature_range = (0, 1))
new_data = scaler.fit_transform(data)
print(new_data)

[[0.   0.  ]
 [0.25 0.25]
 [0.5  0.5 ]
 [1.   1.  ]]


In [21]:
print(scaler.data_max_)

[ 1. 18.]


In [22]:
print(scaler.data_min_)

[-1.  2.]


In [23]:
print(scaler.data_range_)

[ 2. 16.]


# **Normalizer**

In [24]:
from sklearn.preprocessing import Normalizer

In [27]:
data = [[4, 1, 2, 2],
        [1, 3, 9, 3],
        [5, 7, 5, 1]]
transformer = Normalizer(norm = 'l1')
new_ = transformer.fit_transform(data)
print(new_)

''' el-output gives you the percent of every number divided by the sum of each row that this value was in '''
''' ex(4/(4+1+2+2)), and so on... '''

[[0.44444444 0.11111111 0.22222222 0.22222222]
 [0.0625     0.1875     0.5625     0.1875    ]
 [0.27777778 0.38888889 0.27777778 0.05555556]]


' ex(4/(4+1+2+2)), and so on... '

In [28]:
data = [[4, 1, 2, 2],
        [1, 3, 9, 3],
        [5, 7, 5, 1]]
transformer = Normalizer(norm = 'l2')
new_ = transformer.fit_transform(data)
print(new_)

[[0.8 0.2 0.4 0.4]
 [0.1 0.3 0.9 0.3]
 [0.5 0.7 0.5 0.1]]


In [30]:
data = [[4, 1, 2, 2],
        [1, 3, 9, 3],
        [5, 7, 5, 1]]
transformer = Normalizer(norm = 'max')
new_ = transformer.fit_transform(data)
print(new_)

[[1.         0.25       0.5        0.5       ]
 [0.11111111 0.33333333 1.         0.33333333]
 [0.71428571 1.         0.71428571 0.14285714]]


# **MaxAbsScaler**

In [31]:
from sklearn.preprocessing import MaxAbsScaler

In [34]:
data = [[1, 10, 2],
        [2, 0, 0],
        [5, 1, -1]]
transformer = MaxAbsScaler().fit_transform(data)
print(transformer)

[[ 0.2  1.   1. ]
 [ 0.4  0.   0. ]
 [ 1.   0.1 -0.5]]



# **Benarizer**

In [36]:
from sklearn.preprocessing import Binarizer

In [39]:
data = [[1., -1., -2.],
        [2., 0., -1.],
        [0., 1., -1.]]
transformer = Binarizer(threshold = 0.5).fit_transform(data)
print(transformer)

[[1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]]


# **Polynomial Feature**

In [42]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

In [45]:
'''
this function increase features
'''
data = np.arange(6).reshape(3, 2)
data

array([[0, 1],
       [2, 3],
       [4, 5]])

In [48]:
scaler = PolynomialFeatures(degree =  2, include_bias = True)
print(scaler.fit_transform(data))
'''
the first column = Bias = 1
the second column = x1(0, 2, 4)
the third column = x2(1, 3, 5)
the fourth column = x1^2(0, 4, 16)
the fifth column = x2^2(1, 9, 25)
'''

[[ 1.  0.  1.  0.  0.  1.]
 [ 1.  2.  3.  4.  6.  9.]
 [ 1.  4.  5. 16. 20. 25.]]


'\nthe first column = Bias = 1\nthe second column = x1(0, 2, 4)\nthe third column = x2(1, 3, 5)\nthe fourth column = x1^2(0, 4, 16)\nthe fifth column = x2^2(1, 9, 25)\n'

In [51]:
scaler = PolynomialFeatures(interaction_only = True)
print(scaler.fit_transform(data))

[[ 1.  0.  1.  0.]
 [ 1.  2.  3.  6.]
 [ 1.  4.  5. 20.]]
