# Feature Engineering

## Configurações

In [2]:
# configurações
import importlib #quando necessário reimportar pacotes

import sys
sys.path.append("../")
sys.path.append("../ml-project-template")

import config
import utils
import data_manager as data_mgr
import feature_manager as feat_mgr

In [48]:
data_manager = data_mgr.DataManager()
feature_manager = feat_mgr.FeatureEngineer()

## v0_basico (Apenas remoção de variáveis indesejadas)

Essa versão do database é útil para o catboost uma vez que não é necessário utilizar fazer o tratamento das variáveis categóricas, tratadas automaticamente dentro do algoritmo. 

Ele serve como uma versão de beanchmark para o catboost, identificando posteriores avanços com incorporações de técnicas de feature engeneering.

In [15]:
raw_train_df, raw_test_df = data_manager.load_raw_data(filelist = ['train.pkl', 'test.pkl'])

train.pkl - formato: (8000, 14)
test.pkl - formato: (2000, 14)


In [34]:
train_processed_v0 = feature_manager.remove_problematic_columns(raw_train_df)
train_processed_v0

️ Removendo colunas: ['RowNumber', 'CustomerId', 'Surname']


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
8769,622,France,Male,31,7,0.00,1,1,0,35408.77,0
3920,634,Germany,Male,43,3,212696.32,1,1,0,115268.86,0
3983,626,France,Female,44,2,0.00,1,0,1,173117.22,1
3944,612,France,Female,31,8,117989.76,1,1,1,54129.86,0
3279,652,Spain,Male,37,7,0.00,2,1,0,68789.93,0
...,...,...,...,...,...,...,...,...,...,...,...
3218,663,France,Male,36,10,0.00,2,1,0,136349.55,0
4134,678,France,Male,43,5,102338.19,1,1,1,79649.62,0
4453,809,Germany,Male,33,8,148055.74,1,0,0,199203.21,0
9346,716,France,Male,41,9,0.00,1,1,1,113267.48,0


In [35]:
test_processed_v0 = feature_manager.remove_problematic_columns(raw_test_df)
test_processed_v0

️ Removendo colunas: ['RowNumber', 'CustomerId', 'Surname']


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
4559,528,Germany,Female,62,7,133201.17,1,0,0,168507.68,1
2531,607,Germany,Male,47,4,148826.32,1,1,1,79450.61,0
7504,654,Spain,Female,32,2,0.00,1,1,1,51972.92,1
4879,783,Spain,Female,44,3,81811.71,1,1,0,164213.53,1
6418,754,Spain,Female,27,8,0.00,2,0,0,121821.16,0
...,...,...,...,...,...,...,...,...,...,...,...
7997,465,Germany,Male,24,6,156007.09,1,1,0,191368.37,0
2782,646,France,Male,24,8,0.00,2,0,0,92612.88,0
8552,468,France,Female,22,1,76318.64,1,1,1,194783.12,0
5562,620,Spain,Male,42,9,121490.05,1,1,1,29296.74,0


In [36]:
data_manager.save_processed_data(
    train_processed_v0, test_processed_v0, 
    feature_set_name = "v0_basico"
)

 Dados salvos em: D:\mba\Data Science e Analytics (USP-Esalq)\99 - TCC\Projeto\notebooks\..\data\processed\v0_basico


## v1_one-hot_encoding (v0_basico + one-hot encoding aplicado as variáveis categóricas)

O database v1_encoded é equivalente ao database v0_basico, mas para outros algoritmos que assim como o catboost não precisam de normalização das variáveis numéricas, mas que necessitam de one-hot enconding para sua utilização (ex.: xgboost, lightgbm, random forest).

In [37]:
train_v0, test_v0 = data_manager.load_processed_data(feature_set = "v0_basico")

 Carregando features: v0_basico
 FeatureSet info: 11 features


In [38]:
feature_manager.fit_onehot_encoder(train_v0)

<feature_manager.FeatureEngineer at 0x126e82e9a90>

In [39]:
train_processed_v1 = feature_manager.transform_with_onehot(train_v0)
train_processed_v1

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Male,HasCrCard_1,IsActiveMember_1
8769,622,31,7,0.00,1,35408.77,0,1.0,0.0,0.0,1.0,1.0,0.0
3920,634,43,3,212696.32,1,115268.86,0,0.0,1.0,0.0,1.0,1.0,0.0
3983,626,44,2,0.00,1,173117.22,1,1.0,0.0,0.0,0.0,0.0,1.0
3944,612,31,8,117989.76,1,54129.86,0,1.0,0.0,0.0,0.0,1.0,1.0
3279,652,37,7,0.00,2,68789.93,0,0.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3218,663,36,10,0.00,2,136349.55,0,1.0,0.0,0.0,1.0,1.0,0.0
4134,678,43,5,102338.19,1,79649.62,0,1.0,0.0,0.0,1.0,1.0,1.0
4453,809,33,8,148055.74,1,199203.21,0,0.0,1.0,0.0,1.0,0.0,0.0
9346,716,41,9,0.00,1,113267.48,0,1.0,0.0,0.0,1.0,1.0,1.0


In [41]:
test_processed_v1 = feature_manager.transform_with_onehot(test_v0)
test_processed_v1

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Male,HasCrCard_1,IsActiveMember_1
4559,528,62,7,133201.17,1,168507.68,1,0.0,1.0,0.0,0.0,0.0,0.0
2531,607,47,4,148826.32,1,79450.61,0,0.0,1.0,0.0,1.0,1.0,1.0
7504,654,32,2,0.00,1,51972.92,1,0.0,0.0,1.0,0.0,1.0,1.0
4879,783,44,3,81811.71,1,164213.53,1,0.0,0.0,1.0,0.0,1.0,0.0
6418,754,27,8,0.00,2,121821.16,0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7997,465,24,6,156007.09,1,191368.37,0,0.0,1.0,0.0,1.0,1.0,0.0
2782,646,24,8,0.00,2,92612.88,0,1.0,0.0,0.0,1.0,0.0,0.0
8552,468,22,1,76318.64,1,194783.12,0,1.0,0.0,0.0,0.0,1.0,1.0
5562,620,42,9,121490.05,1,29296.74,0,0.0,0.0,1.0,1.0,1.0,1.0


In [42]:
data_manager.save_processed_data(
    train_processed_v1, test_processed_v1, 
    feature_set_name = "v1_one-hot_encoding"
)

 Dados salvos em: D:\mba\Data Science e Analytics (USP-Esalq)\99 - TCC\Projeto\notebooks\..\data\processed\v1_one-hot_encoding


## v2_one-hot_encoding_plus_normalizacao

Partindo do database v1, adiciona a padronização z-score (standar_scaler do sklearn) às variáveis núméricas. 

Portanto, é composto por one-hot encondig em variáveis categóricas + z-score em variáveis numéricas

In [51]:
train_v1, test_v1 = data_manager.load_processed_data(feature_set = "v1_one-hot_encoding")

 Carregando features: v1_one-hot_encoding
 FeatureSet info: 13 features


In [52]:
feature_manager.fit_standard_scaler(train_v1)

<feature_manager.FeatureEngineer at 0x126e82ea930>

In [53]:
train_processed_v2 = feature_manager.transform_with_standard_scaler(train_v1)
train_processed_v2

Unnamed: 0,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Male,HasCrCard_1,IsActiveMember_1,CreditScore,Age,Tenure,Balance,NumOfProducts,EstimatedSalary
8769,0,1.0,0.0,0.0,1.0,1.0,0.0,-0.297153,-0.759691,0.687591,-1.227884,-0.912204,-1.124006
3920,0,0.0,1.0,0.0,1.0,1.0,0.0,-0.172801,0.386021,-0.695631,2.184041,-0.912204,0.269924
3983,1,1.0,0.0,0.0,0.0,0.0,1.0,-0.255702,0.481497,-1.041437,-1.227884,-0.912204,1.279647
3944,0,1.0,0.0,0.0,0.0,1.0,1.0,-0.400780,-0.759691,1.033397,0.664825,-0.912204,-0.797236
3279,0,0.0,0.0,1.0,1.0,1.0,0.0,0.013728,-0.186835,0.687591,-1.227884,0.798045,-0.541349
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3218,0,1.0,0.0,0.0,1.0,1.0,0.0,0.127718,-0.282311,1.725009,-1.227884,0.798045,0.637880
4134,0,1.0,0.0,0.0,1.0,1.0,1.0,0.283158,0.386021,-0.004020,0.413754,-0.912204,-0.351797
4453,0,0.0,1.0,0.0,1.0,0.0,0.0,1.640673,-0.568739,1.033397,1.147123,-0.912204,1.734968
9346,0,1.0,0.0,0.0,1.0,1.0,1.0,0.676941,0.195069,1.379203,-1.227884,-0.912204,0.234991


In [55]:
test_processed_v2 = feature_manager.transform_with_standard_scaler(test_v1)
test_processed_v2

Unnamed: 0,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Male,HasCrCard_1,IsActiveMember_1,CreditScore,Age,Tenure,Balance,NumOfProducts,EstimatedSalary
4559,1,0.0,1.0,0.0,0.0,0.0,0.0,-1.271248,2.200065,0.687591,0.908836,-0.912204,1.199189
2531,0,0.0,1.0,0.0,1.0,1.0,1.0,-0.452594,0.767925,-0.349826,1.159484,-0.912204,-0.355271
7504,1,0.0,0.0,1.0,0.0,1.0,1.0,0.034453,-0.664215,-1.041437,-1.227884,-0.912204,-0.834884
4879,1,0.0,0.0,1.0,0.0,1.0,0.0,1.371242,0.481497,-0.695631,0.084482,-0.912204,1.124236
6418,0,0.0,0.0,1.0,0.0,0.0,0.0,1.070724,-1.141595,1.033397,-1.227884,0.798045,0.384292
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7997,0,0.0,1.0,0.0,1.0,1.0,0.0,-1.924098,-1.428022,0.341786,1.274673,-0.912204,1.598214
2782,0,1.0,0.0,0.0,1.0,0.0,0.0,-0.048448,-1.428022,1.033397,-1.227884,0.798045,-0.125528
8552,0,1.0,0.0,0.0,0.0,1.0,1.0,-1.893010,-1.618974,-1.387243,-0.003634,-0.912204,1.657817
5562,0,0.0,0.0,1.0,1.0,1.0,1.0,-0.317879,0.290545,1.379203,0.720974,-0.912204,-1.230689


In [56]:
data_manager.save_processed_data(
    train_processed_v2, test_processed_v2, 
    feature_set_name = "v2_one-hot_encoding_plus_normalizacao"
)

 Dados salvos em: D:\mba\Data Science e Analytics (USP-Esalq)\99 - TCC\Projeto\notebooks\..\data\processed\v2_one-hot_encoding_plus_normalizacao


## v3_one-hot_encoding_plus_normalizacao_plus_poly

Partindo de v2, criam-se variáveis numéricas a partir do produto dois a dois das variáveis numéricas iniciais normalizadas.

In [57]:
train_v2, test_v2 = data_manager.load_processed_data(feature_set = "v2_one-hot_encoding_plus_normalizacao")

 Carregando features: v2_one-hot_encoding_plus_normalizacao
 FeatureSet info: 13 features


In [58]:
feature_manager.fit_poly(train_v2)

<feature_manager.FeatureEngineer at 0x126e82ea930>

In [59]:
train_processed_v3 = feature_manager.transform_with_poly(train_v2)
train_processed_v3

Unnamed: 0,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Male,HasCrCard_1,IsActiveMember_1,CreditScore,Age,Tenure,...,Tenure^2,Tenure Balance,Tenure NumOfProducts,Tenure EstimatedSalary,Balance^2,Balance NumOfProducts,Balance EstimatedSalary,NumOfProducts^2,NumOfProducts EstimatedSalary,EstimatedSalary^2
8769,0,1.0,0.0,0.0,1.0,1.0,0.0,-0.297153,-0.759691,0.687591,...,0.472782,-0.844283,-0.627224,-0.772857,1.507699,1.120081,1.380148,0.832116,1.025322,1.263388
3920,0,0.0,1.0,0.0,1.0,1.0,0.0,-0.172801,0.386021,-0.695631,...,0.483903,-1.519288,0.634558,-0.187768,4.770037,-1.992291,0.589525,0.832116,-0.246226,0.072859
3983,1,1.0,0.0,0.0,0.0,0.0,1.0,-0.255702,0.481497,-1.041437,...,1.084591,1.278764,0.950003,-1.332672,1.507699,1.120081,-1.571258,0.832116,-1.167299,1.637496
3944,0,1.0,0.0,0.0,0.0,1.0,1.0,-0.400780,-0.759691,1.033397,...,1.067910,0.687028,-0.942669,-0.823861,0.441992,-0.606456,-0.530022,0.832116,0.727241,0.635585
3279,0,0.0,0.0,1.0,1.0,1.0,0.0,0.013728,-0.186835,0.687591,...,0.472782,-0.844283,0.548729,-0.372227,1.507699,-0.979906,0.664714,0.636876,-0.432021,0.293059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3218,0,1.0,0.0,0.0,1.0,1.0,0.0,0.127718,-0.282311,1.725009,...,2.975655,-2.118111,1.376634,1.100349,1.507699,-0.979906,-0.783243,0.636876,0.509057,0.406891
4134,0,1.0,0.0,0.0,1.0,1.0,1.0,0.283158,0.386021,-0.004020,...,0.000016,-0.001663,0.003667,0.001414,0.171192,-0.377428,-0.145557,0.832116,0.320911,0.123761
4453,0,0.0,1.0,0.0,1.0,0.0,0.0,1.640673,-0.568739,1.033397,...,1.067910,1.185433,-0.942669,1.792912,1.315890,-1.046410,1.990221,0.832116,-1.582645,3.010115
9346,0,1.0,0.0,0.0,1.0,1.0,1.0,0.676941,0.195069,1.379203,...,1.902201,-1.693501,-1.258114,0.324100,1.507699,1.120081,-0.288541,0.832116,-0.214359,0.055221


In [60]:
test_processed_v3 = feature_manager.transform_with_poly(test_v2)
test_processed_v3

Unnamed: 0,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Male,HasCrCard_1,IsActiveMember_1,CreditScore,Age,Tenure,...,Tenure^2,Tenure Balance,Tenure NumOfProducts,Tenure EstimatedSalary,Balance^2,Balance NumOfProducts,Balance EstimatedSalary,NumOfProducts^2,NumOfProducts EstimatedSalary,EstimatedSalary^2
4559,1,0.0,1.0,0.0,0.0,0.0,0.0,-1.271248,2.200065,0.687591,...,0.472782,0.624908,-0.627224,0.824552,0.825983,-0.829044,1.089866,0.832116,-1.093905,1.438054
2531,0,0.0,1.0,0.0,1.0,1.0,1.0,-0.452594,0.767925,-0.349826,...,0.122378,-0.405617,0.319112,0.124283,1.344402,-1.057685,-0.411931,0.832116,0.324079,0.126217
7504,1,0.0,0.0,1.0,0.0,1.0,1.0,0.034453,-0.664215,-1.041437,...,1.084591,1.278764,0.950003,0.869479,1.507699,1.120081,1.025141,0.832116,0.761585,0.697032
4879,1,0.0,0.0,1.0,0.0,1.0,0.0,1.371242,0.481497,-0.695631,...,0.483903,-0.058768,0.634558,-0.782054,0.007137,-0.077065,0.094978,0.832116,-1.025532,1.263906
6418,0,0.0,0.0,1.0,0.0,0.0,0.0,1.070724,-1.141595,1.033397,...,1.067910,-1.268892,0.824697,0.397126,1.507699,-0.979906,-0.471866,0.636876,0.306682,0.147680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7997,0,0.0,1.0,0.0,1.0,1.0,0.0,-1.924098,-1.428022,0.341786,...,0.116817,0.435665,-0.311778,0.546247,1.624790,-1.162761,2.037199,0.832116,-1.457897,2.554288
2782,0,1.0,0.0,0.0,1.0,0.0,0.0,-0.048448,-1.428022,1.033397,...,1.067910,-1.268892,0.824697,-0.129720,1.507699,-0.979906,0.154134,0.636876,-0.100177,0.015757
8552,0,1.0,0.0,0.0,0.0,1.0,1.0,-1.893010,-1.618974,-1.387243,...,1.924443,0.005041,1.265448,-2.299795,0.000013,0.003315,-0.006024,0.832116,-1.512267,2.748358
5562,0,0.0,0.0,1.0,1.0,1.0,1.0,-0.317879,0.290545,1.379203,...,1.902201,0.994370,-1.258114,-1.697370,0.519804,-0.657676,-0.887295,0.832116,1.122639,1.514595


In [61]:
data_manager.save_processed_data(
    train_processed_v3, test_processed_v3, 
    feature_set_name = "v3_one-hot_encoding_plus_normalizacao_plus_poly"
)

 Dados salvos em: D:\mba\Data Science e Analytics (USP-Esalq)\99 - TCC\Projeto\notebooks\..\data\processed\v3_one-hot_encoding_plus_normalizacao_plus_poly


## v4_normalizacao_plus_poly

Partindo de v0, criam-se variáveis numéricas a partir do produto dois a dois das variáveis numéricas iniciais normalizadas.

Parte-se de v0 para manter as variáveis categóricas sem one-hot encoding para o catboost.

In [66]:
train_v0, test_v0 = data_manager.load_processed_data(feature_set = "v0_basico")

 Carregando features: v0_basico
 FeatureSet info: 11 features


In [67]:
feature_manager.fit_standard_scaler(train_v0)

<feature_manager.FeatureEngineer at 0x126e82ea930>

In [68]:
train_processed_v4_temp = feature_manager.transform_with_standard_scaler(train_v0)
train_processed_v4_temp

Unnamed: 0,Geography,Gender,HasCrCard,IsActiveMember,Exited,CreditScore,Age,Tenure,Balance,NumOfProducts,EstimatedSalary
8769,France,Male,1,0,0,-0.297153,-0.759691,0.687591,-1.227884,-0.912204,-1.124006
3920,Germany,Male,1,0,0,-0.172801,0.386021,-0.695631,2.184041,-0.912204,0.269924
3983,France,Female,0,1,1,-0.255702,0.481497,-1.041437,-1.227884,-0.912204,1.279647
3944,France,Female,1,1,0,-0.400780,-0.759691,1.033397,0.664825,-0.912204,-0.797236
3279,Spain,Male,1,0,0,0.013728,-0.186835,0.687591,-1.227884,0.798045,-0.541349
...,...,...,...,...,...,...,...,...,...,...,...
3218,France,Male,1,0,0,0.127718,-0.282311,1.725009,-1.227884,0.798045,0.637880
4134,France,Male,1,1,0,0.283158,0.386021,-0.004020,0.413754,-0.912204,-0.351797
4453,Germany,Male,0,0,0,1.640673,-0.568739,1.033397,1.147123,-0.912204,1.734968
9346,France,Male,1,1,0,0.676941,0.195069,1.379203,-1.227884,-0.912204,0.234991


In [69]:
test_processed_v4_temp = feature_manager.transform_with_standard_scaler(test_v0)
test_processed_v4_temp

Unnamed: 0,Geography,Gender,HasCrCard,IsActiveMember,Exited,CreditScore,Age,Tenure,Balance,NumOfProducts,EstimatedSalary
4559,Germany,Female,0,0,1,-1.271248,2.200065,0.687591,0.908836,-0.912204,1.199189
2531,Germany,Male,1,1,0,-0.452594,0.767925,-0.349826,1.159484,-0.912204,-0.355271
7504,Spain,Female,1,1,1,0.034453,-0.664215,-1.041437,-1.227884,-0.912204,-0.834884
4879,Spain,Female,1,0,1,1.371242,0.481497,-0.695631,0.084482,-0.912204,1.124236
6418,Spain,Female,0,0,0,1.070724,-1.141595,1.033397,-1.227884,0.798045,0.384292
...,...,...,...,...,...,...,...,...,...,...,...
7997,Germany,Male,1,0,0,-1.924098,-1.428022,0.341786,1.274673,-0.912204,1.598214
2782,France,Male,0,0,0,-0.048448,-1.428022,1.033397,-1.227884,0.798045,-0.125528
8552,France,Female,1,1,0,-1.893010,-1.618974,-1.387243,-0.003634,-0.912204,1.657817
5562,Spain,Male,1,1,0,-0.317879,0.290545,1.379203,0.720974,-0.912204,-1.230689


In [70]:
feature_manager.fit_poly(train_processed_v4_temp)

<feature_manager.FeatureEngineer at 0x126e82ea930>

In [74]:
train_processed_v4 = feature_manager.transform_with_poly(train_processed_v4_temp)
train_processed_v4

Unnamed: 0,Geography,Gender,HasCrCard,IsActiveMember,Exited,CreditScore,Age,Tenure,Balance,NumOfProducts,...,Tenure^2,Tenure Balance,Tenure NumOfProducts,Tenure EstimatedSalary,Balance^2,Balance NumOfProducts,Balance EstimatedSalary,NumOfProducts^2,NumOfProducts EstimatedSalary,EstimatedSalary^2
8769,France,Male,1,0,0,-0.297153,-0.759691,0.687591,-1.227884,-0.912204,...,0.472782,-0.844283,-0.627224,-0.772857,1.507699,1.120081,1.380148,0.832116,1.025322,1.263388
3920,Germany,Male,1,0,0,-0.172801,0.386021,-0.695631,2.184041,-0.912204,...,0.483903,-1.519288,0.634558,-0.187768,4.770037,-1.992291,0.589525,0.832116,-0.246226,0.072859
3983,France,Female,0,1,1,-0.255702,0.481497,-1.041437,-1.227884,-0.912204,...,1.084591,1.278764,0.950003,-1.332672,1.507699,1.120081,-1.571258,0.832116,-1.167299,1.637496
3944,France,Female,1,1,0,-0.400780,-0.759691,1.033397,0.664825,-0.912204,...,1.067910,0.687028,-0.942669,-0.823861,0.441992,-0.606456,-0.530022,0.832116,0.727241,0.635585
3279,Spain,Male,1,0,0,0.013728,-0.186835,0.687591,-1.227884,0.798045,...,0.472782,-0.844283,0.548729,-0.372227,1.507699,-0.979906,0.664714,0.636876,-0.432021,0.293059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3218,France,Male,1,0,0,0.127718,-0.282311,1.725009,-1.227884,0.798045,...,2.975655,-2.118111,1.376634,1.100349,1.507699,-0.979906,-0.783243,0.636876,0.509057,0.406891
4134,France,Male,1,1,0,0.283158,0.386021,-0.004020,0.413754,-0.912204,...,0.000016,-0.001663,0.003667,0.001414,0.171192,-0.377428,-0.145557,0.832116,0.320911,0.123761
4453,Germany,Male,0,0,0,1.640673,-0.568739,1.033397,1.147123,-0.912204,...,1.067910,1.185433,-0.942669,1.792912,1.315890,-1.046410,1.990221,0.832116,-1.582645,3.010115
9346,France,Male,1,1,0,0.676941,0.195069,1.379203,-1.227884,-0.912204,...,1.902201,-1.693501,-1.258114,0.324100,1.507699,1.120081,-0.288541,0.832116,-0.214359,0.055221


In [75]:
test_processed_v4 = feature_manager.transform_with_poly(test_processed_v4_temp)
test_processed_v4

Unnamed: 0,Geography,Gender,HasCrCard,IsActiveMember,Exited,CreditScore,Age,Tenure,Balance,NumOfProducts,...,Tenure^2,Tenure Balance,Tenure NumOfProducts,Tenure EstimatedSalary,Balance^2,Balance NumOfProducts,Balance EstimatedSalary,NumOfProducts^2,NumOfProducts EstimatedSalary,EstimatedSalary^2
4559,Germany,Female,0,0,1,-1.271248,2.200065,0.687591,0.908836,-0.912204,...,0.472782,0.624908,-0.627224,0.824552,0.825983,-0.829044,1.089866,0.832116,-1.093905,1.438054
2531,Germany,Male,1,1,0,-0.452594,0.767925,-0.349826,1.159484,-0.912204,...,0.122378,-0.405617,0.319112,0.124283,1.344402,-1.057685,-0.411931,0.832116,0.324079,0.126217
7504,Spain,Female,1,1,1,0.034453,-0.664215,-1.041437,-1.227884,-0.912204,...,1.084591,1.278764,0.950003,0.869479,1.507699,1.120081,1.025141,0.832116,0.761585,0.697032
4879,Spain,Female,1,0,1,1.371242,0.481497,-0.695631,0.084482,-0.912204,...,0.483903,-0.058768,0.634558,-0.782054,0.007137,-0.077065,0.094978,0.832116,-1.025532,1.263906
6418,Spain,Female,0,0,0,1.070724,-1.141595,1.033397,-1.227884,0.798045,...,1.067910,-1.268892,0.824697,0.397126,1.507699,-0.979906,-0.471866,0.636876,0.306682,0.147680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7997,Germany,Male,1,0,0,-1.924098,-1.428022,0.341786,1.274673,-0.912204,...,0.116817,0.435665,-0.311778,0.546247,1.624790,-1.162761,2.037199,0.832116,-1.457897,2.554288
2782,France,Male,0,0,0,-0.048448,-1.428022,1.033397,-1.227884,0.798045,...,1.067910,-1.268892,0.824697,-0.129720,1.507699,-0.979906,0.154134,0.636876,-0.100177,0.015757
8552,France,Female,1,1,0,-1.893010,-1.618974,-1.387243,-0.003634,-0.912204,...,1.924443,0.005041,1.265448,-2.299795,0.000013,0.003315,-0.006024,0.832116,-1.512267,2.748358
5562,Spain,Male,1,1,0,-0.317879,0.290545,1.379203,0.720974,-0.912204,...,1.902201,0.994370,-1.258114,-1.697370,0.519804,-0.657676,-0.887295,0.832116,1.122639,1.514595


In [76]:
data_manager.save_processed_data(
    train_processed_v4, test_processed_v4, 
    feature_set_name = "v4_normalizacao_plus_poly"
)

 Dados salvos em: D:\mba\Data Science e Analytics (USP-Esalq)\99 - TCC\Projeto\notebooks\..\data\processed\v4_normalizacao_plus_poly
