In [1]:
import pandas as pd
import numpy as np
import warnings
import logging
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline

# 1. Silenciar Warnings e Logs do XGBoost
warnings.filterwarnings('ignore') # Silencia warnings do Python/Sklearn
logging.getLogger('xgboost').setLevel(logging.ERROR) # Silencia logs internos do XGBoost

In [2]:
# 2. Carga e Prepara√ß√£o dos Dados
df = pd.read_csv('../data/processed/biometria_final_pos.csv')
le = LabelEncoder()
y = le.fit_transform(df['cow_id'])

# Separa√ß√£o das colunas
cols_geo = [c for c in df.columns if c.startswith('geo_')]
cols_img_all = [c for c in df.columns if c.startswith('img_')]
cols_img_no_sift = [c for c in cols_img_all if 'sift' not in c]

X_cenario_A = df[cols_geo + cols_img_no_sift]
X_cenario_B = df[cols_geo + cols_img_all]

In [3]:
# 3. Valida√ß√£o Cruzada (5 Folds - Rigor Acad√™mico)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 4. Grids de Hiperpar√¢metros
params_xgb = {
    'classifier__n_estimators': [200, 500],
    'classifier__max_depth': [6, 10],
    'classifier__learning_rate': [0.05, 0.1],
    'classifier__tree_method': ['hist'],
    'classifier__device': ['cuda'],
    'classifier__verbosity': [0] # Silencia warnings dentro do booster
}

params_mlp = {
    'classifier__hidden_layer_sizes': [(128, 64), (256, 128)],
    'classifier__activation': ['tanh', 'relu'],
    'classifier__alpha': [0.0001, 0.01]
}

def treinar_hibrido_limpo(X, y, titulo):
    print(f"\n{'='*20} {titulo} {'='*20}")
    
    # --- XGBoost ---
    pipe_xgb = Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', XGBClassifier(random_state=42))
    ])
    grid_xgb = GridSearchCV(pipe_xgb, params_xgb, cv=skf, scoring='accuracy', n_jobs=-1)
    grid_xgb.fit(X, y)
    print(f"üèÜ Melhor XGBoost: {grid_xgb.best_score_:.2%} | Params: {grid_xgb.best_params_}")
    
    # --- MLP ---
    pipe_mlp = Pipeline([
        ('scaler', StandardScaler()),
        ('classifier', MLPClassifier(max_iter=1000, random_state=42))
    ])
    grid_mlp = GridSearchCV(pipe_mlp, params_mlp, cv=skf, scoring='accuracy', n_jobs=-1)
    grid_mlp.fit(X, y)
    print(f"üèÜ Melhor MLP: {grid_mlp.best_score_:.2%} | Params: {grid_mlp.best_params_}")

In [4]:
# 5. Execu√ß√£o
treinar_hibrido_limpo(X_cenario_A, y, "CEN√ÅRIO A: Geometria + Pixels (Sem SIFT)")
treinar_hibrido_limpo(X_cenario_B, y, "CEN√ÅRIO B: Geometria + Pixels + SIFT (Completo)")


üèÜ Melhor XGBoost: 56.07% | Params: {'classifier__device': 'cuda', 'classifier__learning_rate': 0.1, 'classifier__max_depth': 6, 'classifier__n_estimators': 500, 'classifier__tree_method': 'hist', 'classifier__verbosity': 0}
üèÜ Melhor MLP: 61.33% | Params: {'classifier__activation': 'tanh', 'classifier__alpha': 0.0001, 'classifier__hidden_layer_sizes': (256, 128)}

üèÜ Melhor XGBoost: 56.13% | Params: {'classifier__device': 'cuda', 'classifier__learning_rate': 0.05, 'classifier__max_depth': 6, 'classifier__n_estimators': 500, 'classifier__tree_method': 'hist', 'classifier__verbosity': 0}
üèÜ Melhor MLP: 61.80% | Params: {'classifier__activation': 'tanh', 'classifier__alpha': 0.0001, 'classifier__hidden_layer_sizes': (256, 128)}
