In [None]:
from pycaret.classification import *
import pandas as pd
import numpy as np

# Setup

In [None]:
data = pd.read_csv('Dados/dataset_selected.csv')
data.head()

In [None]:
s = setup(
    data = data, target = 'label', session_id = 123, profile = False,
    # Treino - Teste
    train_size = 0.5, fix_imbalance = False, data_split_stratify = True,

    # === PREPROCESSAMENTO ===
    preprocess = True,
    
    # Features Polinomiais
    polynomial_features = False, polynomial_degree = 2,
    
    # Multicolinearidade
    remove_multicollinearity = False, multicollinearity_threshold = 0.9,
    
    # Transformação: "applies the power transform to make data more Gaussian-like"
    transformation = False, transformation_method = 'yeo-johnson', # 'yeo-johnson' ou 'quantile'
    
    # Normalização
    normalize = False, normalize_method = 'zscore', # 'zscore', 'minmax', 'maxabs', 'robust'

    # Feature Selection
    feature_selection = False, n_features_to_select = 3, feature_selection_method = 'classic' # 'classic', 'univariate', 'sequential'
)

In [None]:
get_config()
y_test_transformed = get_config('y_test_transformed')
y_train_transformed = get_config('y_train_transformed')

train_count = np.bincount(y_train_transformed)
test_count = np.bincount(y_test_transformed)
train_prop = (train_count/train_count.sum())*100
test_prop = (test_count/test_count.sum())*100
print(f"Contagem de Classes (treino): {train_count} ({[round(val,3) for val in train_prop]}%)")
print(f"Contagem de Classes (teste): {test_count} ({[round(val,3) for val in test_prop]}%)")

# Treinamento

In [None]:
top3 = compare_models(n_select=3)
best = top3[0]

In [None]:
for model in top3:
    holdout_pred = predict_model(model)

# Avaliação do Modelo

In [None]:
plot_model(best, plot = 'pipeline')

In [None]:
plot_model(best, plot = 'parameter')

In [None]:
plot_model(best, plot = 'threshold')

In [None]:
plot_model(best, plot = 'confusion_matrix')

In [None]:
plot_model(best, plot = 'feature')

In [None]:
plot_model(best, plot = 'rfe')

In [None]:
plot_model(best, plot = 'learning')

In [None]:
plot_model(best, plot = 'vc')