In [37]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.base     import TransformerMixin

In [38]:
iris = load_iris()
df = pd.DataFrame(
    iris.data, 
    columns=iris.feature_names
    )

df['target'] = iris.target

# Map targets to target names
target_names = {
    0:'setosa',
    1:'versicolor', 
    2:'virginica'
}

# Escalonamento de dados

In [39]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333,1.0
std,0.828066,0.435866,1.765298,0.762238,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


### Padronizacao
$$
\frac{x_i - \mu}{\sigma}
$$

* $\sigma$ : desvio padrao
* $\mu$ : média

> Deixa o desvio padrão dos dados até 1, reduz o desvio padrão , altera a escala

In [40]:
class Padronizacao(TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        return (X - np.mean(X))/np.std(X)

In [41]:
X = df[['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']]
X = Padronizacao().fit_transform(X.values)

In [42]:
df1 = df.copy()
df1[['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']] = X

In [43]:
df1.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,1.205179,-0.206281,0.148695,-1.147592,1.0
std,0.41952,0.220821,0.894346,0.386169,0.819232
min,0.423286,-0.741954,-1.24858,-1.704543,0.0
25%,0.828587,-0.336653,-0.944604,-1.603218,0.0
50%,1.183225,-0.235328,0.448617,-1.096592,1.0
75%,1.4872,-0.08334,0.828587,-0.843279,2.0
max,2.247139,0.473949,1.740513,-0.488641,2.0


### Normalização
$$
\frac{X - X_{min}}{X_{max}-X_{min}}
$$
> Deixa os dados entre 0 e 1

In [44]:
class Normalizacao(TransformerMixin):
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        return  (X - np.min(X))/(np.max(X) - np.min(X))

In [45]:
X = df[['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']]
X = Normalizacao().fit_transform(X.values)

In [46]:
df2 = df.copy()
df2[['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)']] = X

In [47]:
df2.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
count,150.0,150.0,150.0,150.0,150.0
mean,0.736325,0.379145,0.468974,0.14094,1.0
std,0.106162,0.05588,0.22632,0.097723,0.819232
min,0.538462,0.24359,0.115385,0.0,0.0
25%,0.641026,0.346154,0.192308,0.025641,0.0
50%,0.730769,0.371795,0.544872,0.153846,1.0
75%,0.807692,0.410256,0.641026,0.217949,2.0
max,1.0,0.551282,0.871795,0.307692,2.0
