## Handling Numerical Data

In [1]:
import numpy as np
from sklearn import preprocessing

feature = np.array([[-500.5],
                    [-200.2],
                    [100],
                    [0],
                    [900.9]])

In [6]:
minmax_scale = preprocessing.MinMaxScaler(feature_range=(0,1))

In [7]:
scaled_feature = minmax_scale.fit_transform(feature)
scaled_feature

array([[0.        ],
       [0.21428571],
       [0.42850007],
       [0.35714286],
       [1.        ]])

In [8]:
X = np.array([[100.0],
              [200.2],
              [500.5],
              [900.3]])

scalar = preprocessing.StandardScaler()

standized = scalar.fit_transform(X)

standized

array([[-1.0446243 ],
       [-0.72280615],
       [ 0.24168479],
       [ 1.52574566]])

In [9]:
# Normaliaing
from sklearn.preprocessing import Normalizer

data = np.array([[0.5,0.5],
                 [1.1,3.4],
                 [1.8,2.1],
                 [0.4,0.7]])

normalize = Normalizer(norm='l2')

normalize.transform(data)

array([[0.70710678, 0.70710678],
       [0.30782029, 0.95144452],
       [0.65079137, 0.7592566 ],
       [0.49613894, 0.86824314]])

In [15]:
# Genrating Polynomial and Interaction Feature
from sklearn.preprocessing import PolynomialFeatures

x = np.array([[2,3],
              [2,3],
              [2,3],
              [2,3]])

polynomial_interaction = PolynomialFeatures(degree=2, include_bias=False)

poly = polynomial_interaction.fit_transform(x)

print(poly)

[[2. 3. 4. 6. 9.]
 [2. 3. 4. 6. 9.]
 [2. 3. 4. 6. 9.]
 [2. 3. 4. 6. 9.]]


In [17]:
# Transforming Features
from sklearn.preprocessing import FunctionTransformer
def add_ten(x):
    return x + 10

ten_transform = FunctionTransformer(add_ten)

ten_transform.transform(x)

array([[12, 13],
       [12, 13],
       [12, 13],
       [12, 13]])

In [18]:
# Detecting Outliers
from sklearn.datasets import make_blobs
from sklearn.covariance import EllipticEnvelope

features, _ = make_blobs(n_samples=10,
                         n_features=2,
                         centers= 1,
                         random_state=1)

features[0,0] = 1000
features[0,1] = 1000

outlier_detector = EllipticEnvelope(contamination=.1)

outlier_detector.fit(features)

outlier_detector.predict(features)

array([-1,  1,  1,  1,  1,  1,  1,  1,  1,  1])