In [7]:
import pandas as pd
import joblib
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier

def carregar_dados_treino(caminho_x, caminho_y):
    X_train = pd.read_csv(caminho_x)
    y_train = pd.read_csv(caminho_y).values.ravel()
    return X_train, y_train

def definir_modelos(random_state=42):
    """Retorna um dicion√°rio com os modelos configurados."""
    return {
        "RandomForest": RandomForestClassifier(
            n_estimators=100, max_depth=10, random_state=random_state, class_weight="balanced"
        ),
        "XGBoost": XGBClassifier(
            n_estimators=100, random_state=random_state, use_label_encoder=False, eval_metric='mlogloss'
        ),
        "LogisticRegression": Pipeline([
            ('scaler', RobustScaler()),
            # Removido o multi_class='multinomial'
            ('model', LogisticRegression(class_weight='balanced', max_iter=1000, random_state=random_state))
        ]),
        "SVM": Pipeline([
            ('scaler', RobustScaler()),
            ('model', SVC(kernel='rbf', probability=True, class_weight='balanced', random_state=random_state))
        ])
    }

def salvar_modelo(modelo, nome_modelo):
    caminho_saida = f"../src/models/{nome_modelo}.pkl"
    os.makedirs(os.path.dirname(caminho_saida), exist_ok=True)
    joblib.dump(modelo, caminho_saida)
    print(f"Modelo {nome_modelo} salvo em: {caminho_saida}")

def executar_pipeline_treinamento():
    X_TRAIN_PATH = "../data/processed/X_train.csv"
    Y_TRAIN_PATH = "../data/processed/y_train.csv"

    X_train, y_train = carregar_dados_treino(X_TRAIN_PATH, Y_TRAIN_PATH)
    modelos = definir_modelos()

    for nome, modelo in modelos.items():
        print(f"\n--- Treinando {nome} ---")
        modelo.fit(X_train, y_train)
        salvar_modelo(modelo, nome)


executar_pipeline_treinamento()


--- Treinando RandomForest ---
Modelo RandomForest salvo em: ../src/models/RandomForest.pkl

--- Treinando XGBoost ---


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Modelo XGBoost salvo em: ../src/models/XGBoost.pkl

--- Treinando LogisticRegression ---
Modelo LogisticRegression salvo em: ../src/models/LogisticRegression.pkl

--- Treinando SVM ---
Modelo SVM salvo em: ../src/models/SVM.pkl
