In [1]:
!pip install feature_engine

Collecting feature_engine
  Downloading feature_engine-1.6.2-py2.py3-none-any.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m328.9/328.9 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: feature_engine
Successfully installed feature_engine-1.6.2


In [2]:
import pandas as pd
import numpy as np

# MeanMedianImputer

In [3]:
df = pd.DataFrame({
    'id_carro': [123, 232, 312, 431, 521],
    'valor': [np.nan, 10, 15, 30, np.nan],
})
df

Unnamed: 0,id_carro,valor
0,123,
1,232,10.0
2,312,15.0
3,431,30.0
4,521,


In [4]:
from feature_engine.imputation import MeanMedianImputer
mmi = MeanMedianImputer(variables=['valor'], imputation_method='median')
mmi.fit(df)
mmi.transform(df)

Unnamed: 0,id_carro,valor
0,123,15.0
1,232,10.0
2,312,15.0
3,431,30.0
4,521,15.0


# CategoricalImputer

In [5]:
df = pd.DataFrame({
    'id_carro': [123, 232, 312, 431, 521],
    'cor': ['verde', np.nan, 'vermelho', np.nan, 'verde'],
})
df

Unnamed: 0,id_carro,cor
0,123,verde
1,232,
2,312,vermelho
3,431,
4,521,verde


In [6]:
from feature_engine.imputation import CategoricalImputer
ci = CategoricalImputer(variables=['cor'], fill_value='not_av')
ci.fit(df)
ci.transform(df)

Unnamed: 0,id_carro,cor
0,123,verde
1,232,not_av
2,312,vermelho
3,431,not_av
4,521,verde


# One-Hot Encoding

In [7]:
df = pd.DataFrame({
    'id': [123, 232, 312, 431, 521],
    'cor': ['verde', 'vermelho', 'vermelho', 'azul', 'verde'],
})
df

Unnamed: 0,id,cor
0,123,verde
1,232,vermelho
2,312,vermelho
3,431,azul
4,521,verde


In [8]:
from feature_engine.encoding import OneHotEncoder
ohe = OneHotEncoder(variables=['cor'])
ohe.fit(df)
ohe.transform(df)

Unnamed: 0,id,cor_verde,cor_vermelho,cor_azul
0,123,1,0,0
1,232,0,1,0
2,312,0,1,0
3,431,0,0,1
4,521,1,0,0


# Feature Scaling

## Standardisation

In [9]:
df = pd.DataFrame({
    'valores': [10, 20, 30, 40, 50, 60],
})
df

Unnamed: 0,valores
0,10
1,20
2,30
3,40
4,50
5,60


In [10]:
from feature_engine.wrappers import SklearnTransformerWrapper
from sklearn.preprocessing import StandardScaler

std_scaler = SklearnTransformerWrapper(transformer = StandardScaler(),
                                       variables = ['valores'])

std_scaler.fit(df)
std_scaler.transform(df)

Unnamed: 0,valores
0,-1.46385
1,-0.87831
2,-0.29277
3,0.29277
4,0.87831
5,1.46385


In [11]:
# Calculando manualmente
(10 - df.values.mean()) / df.values.std()

-1.4638501094227996

## Min-Max Scaler

In [12]:
df

Unnamed: 0,valores
0,10
1,20
2,30
3,40
4,50
5,60


In [13]:
from feature_engine.wrappers import SklearnTransformerWrapper
from sklearn.preprocessing import MinMaxScaler

minmax_scaler = SklearnTransformerWrapper(transformer = MinMaxScaler(),
                                          variables = ['valores'])

minmax_scaler.fit(df)
minmax_scaler.transform(df)

Unnamed: 0,valores
0,0.0
1,0.2
2,0.4
3,0.6
4,0.8
5,1.0
