In [1]:
# ============================================================================
# PROJETO: DETEC√á√ÉO DE FRAUDES EM CART√ïES DE CR√âDITO
# Notebook 04: Modelagem e Avalia√ß√£o
# ============================================================================

# %% [markdown]
# # ü§ñ Modelagem de Machine Learning
# 
# Neste notebook vamos:
# - Tratar o desbalanceamento de classes
# - Treinar m√∫ltiplos modelos de ML
# - Otimizar hiperpar√¢metros
# - Avaliar com m√©tricas apropriadas
# - Selecionar o melhor modelo

# %% [markdown]
# ## üì¶ 1. Importa√ß√£o de Bibliotecas

# %%
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Scikit-learn
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import (classification_report, confusion_matrix, 
                             roc_auc_score, roc_curve, precision_recall_curve,
                             f1_score, recall_score, precision_score, accuracy_score)
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Tratamento de desbalanceamento
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as ImbPipeline

# Outros
import time
from datetime import datetime

print("‚úÖ Bibliotecas importadas com sucesso!")

# %% [markdown]
# ## üì• 2. Carregamento dos Dados

# %%
# Carregar conjuntos de treino e teste
X_train = pd.read_csv('../data/processed/X_train.csv')
X_test = pd.read_csv('../data/processed/X_test.csv')
y_train = pd.read_csv('../data/processed/y_train.csv').values.ravel()
y_test = pd.read_csv('../data/processed/y_test.csv').values.ravel()

print("‚úÖ Dados carregados com sucesso!")
print(f"\nüìä Treino: {X_train.shape[0]:,} transa√ß√µes")
print(f"   ‚Ä¢ Leg√≠timas: {(y_train == 0).sum():,}")
print(f"   ‚Ä¢ Fraudes: {(y_train == 1).sum():,}")
print(f"\nüìä Teste: {X_test.shape[0]:,} transa√ß√µes")
print(f"   ‚Ä¢ Leg√≠timas: {(y_test == 0).sum():,}")
print(f"   ‚Ä¢ Fraudes: {(y_test == 1).sum():,}")

# %% [markdown]
# ## ‚öñÔ∏è 3. Tratamento de Desbalanceamento

# %%
print("=" * 80)
print("‚öñÔ∏è ESTRAT√âGIAS DE BALANCEAMENTO")
print("=" * 80)

# Estrat√©gia 1: SMOTE (Oversampling)
smote = SMOTE(random_state=42, sampling_strategy=0.5)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

print(f"\n‚úÖ SMOTE aplicado:")
print(f"   ‚Ä¢ Antes: {len(y_train):,} transa√ß√µes")
print(f"   ‚Ä¢ Depois: {len(y_train_smote):,} transa√ß√µes")
print(f"   ‚Ä¢ Fraudes: {(y_train_smote == 1).sum():,} ({(y_train_smote == 1).sum()/len(y_train_smote)*100:.1f}%)")

# Estrat√©gia 2: Random Undersampling
rus = RandomUnderSampler(random_state=42, sampling_strategy=0.5)
X_train_under, y_train_under = rus.fit_resample(X_train, y_train)

print(f"\n‚úÖ Undersampling aplicado:")
print(f"   ‚Ä¢ Antes: {len(y_train):,} transa√ß√µes")
print(f"   ‚Ä¢ Depois: {len(y_train_under):,} transa√ß√µes")
print(f"   ‚Ä¢ Fraudes: {(y_train_under == 1).sum():,} ({(y_train_under == 1).sum()/len(y_train_under)*100:.1f}%)")

# %% [markdown]
# ## üéØ 4. Baseline - Logistic Regression

# %%
print("=" * 80)
print("üéØ MODELO BASELINE - LOGISTIC REGRESSION")
print("=" * 80)

# Treinar modelo baseline (sem balanceamento)
lr_baseline = LogisticRegression(random_state=42, max_iter=1000)
lr_baseline.fit(X_train, y_train)

# Predi√ß√µes
y_pred_baseline = lr_baseline.predict(X_test)
y_proba_baseline = lr_baseline.predict_proba(X_test)[:, 1]

# M√©tricas
print("\nüìä M√âTRICAS - Baseline (Sem Balanceamento):")
print(f"   ‚Ä¢ Accuracy: {accuracy_score(y_test, y_pred_baseline):.4f}")
print(f"   ‚Ä¢ Precision: {precision_score(y_test, y_pred_baseline):.4f}")
print(f"   ‚Ä¢ Recall: {recall_score(y_test, y_pred_baseline):.4f}")
print(f"   ‚Ä¢ F1-Score: {f1_score(y_test, y_pred_baseline):.4f}")
print(f"   ‚Ä¢ AUC-ROC: {roc_auc_score(y_test, y_proba_baseline):.4f}")

# %% [markdown]
# ## üå≥ 5. Random Forest com SMOTE

# %%
print("=" * 80)
print("üå≥ RANDOM FOREST + SMOTE")
print("=" * 80)

start_time = time.time()

# Treinar Random Forest
rf_smote = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=10,
    min_samples_leaf=4,
    random_state=42,
    n_jobs=-1,
    class_weight='balanced'
)

rf_smote.fit(X_train_smote, y_train_smote)

# Predi√ß√µes
y_pred_rf = rf_smote.predict(X_test)
y_proba_rf = rf_smote.predict_proba(X_test)[:, 1]

training_time = time.time() - start_time

print(f"\n‚è±Ô∏è  Tempo de treinamento: {training_time:.2f} segundos")
print("\nüìä M√âTRICAS - Random Forest + SMOTE:")
print(f"   ‚Ä¢ Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print(f"   ‚Ä¢ Precision: {precision_score(y_test, y_pred_rf):.4f}")
print(f"   ‚Ä¢ Recall: {recall_score(y_test, y_pred_rf):.4f}")
print(f"   ‚Ä¢ F1-Score: {f1_score(y_test, y_pred_rf):.4f}")
print(f"   ‚Ä¢ AUC-ROC: {roc_auc_score(y_test, y_proba_rf):.4f}")

# %% [markdown]
# ## üöÄ 6. Gradient Boosting com SMOTE

# %%
print("=" * 80)
print("üöÄ GRADIENT BOOSTING + SMOTE")
print("=" * 80)

start_time = time.time()

# Treinar Gradient Boosting
gb_smote = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    min_samples_split=10,
    min_samples_leaf=4,
    random_state=42
)

gb_smote.fit(X_train_smote, y_train_smote)

# Predi√ß√µes
y_pred_gb = gb_smote.predict(X_test)
y_proba_gb = gb_smote.predict_proba(X_test)[:, 1]

training_time = time.time() - start_time

print(f"\n‚è±Ô∏è  Tempo de treinamento: {training_time:.2f} segundos")
print("\nüìä M√âTRICAS - Gradient Boosting + SMOTE:")
print(f"   ‚Ä¢ Accuracy: {accuracy_score(y_test, y_pred_gb):.4f}")
print(f"   ‚Ä¢ Precision: {precision_score(y_test, y_pred_gb):.4f}")
print(f"   ‚Ä¢ Recall: {recall_score(y_test, y_pred_gb):.4f}")
print(f"   ‚Ä¢ F1-Score: {f1_score(y_test, y_pred_gb):.4f}")
print(f"   ‚Ä¢ AUC-ROC: {roc_auc_score(y_test, y_proba_gb):.4f}")

# %% [markdown]
# ## üìä 7. Compara√ß√£o de Modelos

# %%
print("=" * 80)
print("üìä COMPARA√á√ÉO DE TODOS OS MODELOS")
print("=" * 80)

# Criar DataFrame de compara√ß√£o
models_comparison = pd.DataFrame({
    'Modelo': ['Logistic Regression (Baseline)', 'Random Forest + SMOTE', 'Gradient Boosting + SMOTE'],
    'Accuracy': [
        accuracy_score(y_test, y_pred_baseline),
        accuracy_score(y_test, y_pred_rf),
        accuracy_score(y_test, y_pred_gb)
    ],
    'Precision': [
        precision_score(y_test, y_pred_baseline),
        precision_score(y_test, y_pred_rf),
        precision_score(y_test, y_pred_gb)
    ],
    'Recall': [
        recall_score(y_test, y_pred_baseline),
        recall_score(y_test, y_pred_rf),
        recall_score(y_test, y_pred_gb)
    ],
    'F1-Score': [
        f1_score(y_test, y_pred_baseline),
        f1_score(y_test, y_pred_rf),
        f1_score(y_test, y_pred_gb)
    ],
    'AUC-ROC': [
        roc_auc_score(y_test, y_proba_baseline),
        roc_auc_score(y_test, y_proba_rf),
        roc_auc_score(y_test, y_proba_gb)
    ]
})

print("\n" + "=" * 100)
print(models_comparison.to_string(index=False))
print("=" * 100)

# %%
# Visualiza√ß√£o da compara√ß√£o
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Gr√°fico 1: Compara√ß√£o de m√©tricas
metrics = ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC-ROC']
x = np.arange(len(metrics))
width = 0.25

for idx, model in enumerate(models_comparison['Modelo']):
    values = models_comparison.iloc[idx, 1:].values
    axes[0].bar(x + idx * width, values, width, 
                label=model, alpha=0.8, edgecolor='black', linewidth=1.5)

axes[0].set_xlabel('M√©trica', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Score', fontsize=12, fontweight='bold')
axes[0].set_title('Compara√ß√£o de M√©tricas entre Modelos', fontsize=14, fontweight='bold')
axes[0].set_xticks(x + width)
axes[0].set_xticklabels(metrics)
axes[0].legend(fontsize=9, loc='lower right')
axes[0].grid(axis='y', alpha=0.3)
axes[0].set_ylim([0, 1.05])

# Gr√°fico 2: Foco em Recall e Precision
recall_precision = models_comparison[['Modelo', 'Recall', 'Precision']].set_index('Modelo')
recall_precision.plot(kind='bar', ax=axes[1], color=['#e74c3c', '#3498db'], 
                      edgecolor='black', linewidth=1.5, alpha=0.8)
axes[1].set_xlabel('Modelo', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Score', fontsize=12, fontweight='bold')
axes[1].set_title('Recall vs Precision (M√©tricas Cr√≠ticas)', fontsize=14, fontweight='bold')
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=15, ha='right')
axes[1].legend(fontsize=10)
axes[1].grid(axis='y', alpha=0.3)
axes[1].set_ylim([0, 1.05])

plt.tight_layout()
plt.savefig('../images/14_models_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nüíæ Gr√°fico de compara√ß√£o salvo!")

# %% [markdown]
# ## üé≠ 8. Matriz de Confus√£o

# %%
print("=" * 80)
print("üé≠ MATRIZES DE CONFUS√ÉO")
print("=" * 80)

# Criar figura com 3 matrizes de confus√£o
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

models_preds = [
    (y_pred_baseline, 'Logistic Regression'),
    (y_pred_rf, 'Random Forest + SMOTE'),
    (y_pred_gb, 'Gradient Boosting + SMOTE')
]

for idx, (y_pred, model_name) in enumerate(models_preds):
    cm = confusion_matrix(y_test, y_pred)
    
    # Criar heatmap
    sns.heatmap(cm, annot=True, fmt='d', cmap='RdYlGn_r', 
                ax=axes[idx], cbar=True, 
                xticklabels=['Leg√≠tima', 'Fraude'],
                yticklabels=['Leg√≠tima', 'Fraude'],
                annot_kws={'size': 14, 'weight': 'bold'})
    
    axes[idx].set_title(f'{model_name}', fontsize=12, fontweight='bold')
    axes[idx].set_ylabel('Valor Real', fontsize=11, fontweight='bold')
    axes[idx].set_xlabel('Predi√ß√£o', fontsize=11, fontweight='bold')
    
    # Adicionar textos explicativos
    tn, fp, fn, tp = cm.ravel()
    axes[idx].text(0.5, -0.15, f'TN={tn:,}  FP={fp:,}\nFN={fn:,}  TP={tp:,}', 
                   ha='center', va='top', transform=axes[idx].transAxes,
                   fontsize=9, bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.savefig('../images/15_confusion_matrices.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nüíæ Matrizes de confus√£o salvas!")

# An√°lise detalhada do melhor modelo
best_cm = confusion_matrix(y_test, y_pred_gb)
tn, fp, fn, tp = best_cm.ravel()

print(f"\nüìä AN√ÅLISE DETALHADA - Gradient Boosting:")
print(f"   ‚Ä¢ True Negatives (TN): {tn:,} - Leg√≠timas corretamente identificadas")
print(f"   ‚Ä¢ False Positives (FP): {fp:,} - Leg√≠timas erroneamente marcadas como fraude")
print(f"   ‚Ä¢ False Negatives (FN): {fn:,} - ‚ö†Ô∏è FRAUDES N√ÉO DETECTADAS!")
print(f"   ‚Ä¢ True Positives (TP): {tp:,} - ‚úÖ Fraudes corretamente detectadas")
print(f"\nüí∞ Taxa de Detec√ß√£o de Fraudes: {tp/(tp+fn)*100:.2f}%")

# %% [markdown]
# ## üìà 9. Curvas ROC e Precision-Recall

# %%
print("=" * 80)
print("üìà CURVAS ROC E PRECISION-RECALL")
print("=" * 80)

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Lista de modelos e probabilidades
models_data = [
    (y_proba_baseline, 'Logistic Regression', '#9b59b6'),
    (y_proba_rf, 'Random Forest', '#2ecc71'),
    (y_proba_gb, 'Gradient Boosting', '#e74c3c')
]

# Curva ROC
for y_proba, label, color in models_data:
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    auc = roc_auc_score(y_test, y_proba)
    axes[0].plot(fpr, tpr, label=f'{label} (AUC = {auc:.4f})', 
                 color=color, linewidth=2.5)

axes[0].plot([0, 1], [0, 1], 'k--', linewidth=1.5, label='Aleat√≥rio (AUC = 0.5)')
axes[0].set_xlabel('Taxa de Falsos Positivos', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Taxa de Verdadeiros Positivos (Recall)', fontsize=12, fontweight='bold')
axes[0].set_title('Curva ROC - Compara√ß√£o de Modelos', fontsize=14, fontweight='bold')
axes[0].legend(fontsize=10, loc='lower right')
axes[0].grid(alpha=0.3)

# Curva Precision-Recall
for y_proba, label, color in models_data:
    precision, recall, _ = precision_recall_curve(y_test, y_proba)
    axes[1].plot(recall, precision, label=label, color=color, linewidth=2.5)

# Baseline (propor√ß√£o de fraudes)
baseline_precision = (y_test == 1).sum() / len(y_test)
axes[1].axhline(y=baseline_precision, color='k', linestyle='--', 
                linewidth=1.5, label=f'Baseline ({baseline_precision:.4f})')

axes[1].set_xlabel('Recall (Sensibilidade)', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Precision', fontsize=12, fontweight='bold')
axes[1].set_title('Curva Precision-Recall', fontsize=14, fontweight='bold')
axes[1].legend(fontsize=10, loc='upper right')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig('../images/16_roc_pr_curves.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nüíæ Curvas ROC e PR salvas!")

# %% [markdown]
# ## üåü 10. Feature Importance (Gradient Boosting)

# %%
print("=" * 80)
print("üåü IMPORT√ÇNCIA DAS FEATURES")
print("=" * 80)

# Extrair import√¢ncia das features
feature_importance = pd.DataFrame({
    'Feature': X_train.columns,
    'Importance': gb_smote.feature_importances_
}).sort_values('Importance', ascending=False)

print("\nüìä TOP 20 FEATURES MAIS IMPORTANTES:")
print(feature_importance.head(20).to_string(index=False))

# %%
# Visualiza√ß√£o
fig, ax = plt.subplots(figsize=(12, 8))

top_20 = feature_importance.head(20)
colors = plt.cm.RdYlGn_r(np.linspace(0.2, 0.8, len(top_20)))

bars = ax.barh(range(len(top_20)), top_20['Importance'].values, 
               color=colors, edgecolor='black', linewidth=1.5)

ax.set_yticks(range(len(top_20)))
ax.set_yticklabels(top_20['Feature'].values)
ax.invert_yaxis()
ax.set_xlabel('Import√¢ncia', fontsize=12, fontweight='bold')
ax.set_title('Top 20 Features Mais Importantes - Gradient Boosting', 
             fontsize=14, fontweight='bold')
ax.grid(axis='x', alpha=0.3)

# Adicionar valores
for i, bar in enumerate(bars):
    width = bar.get_width()
    ax.text(width + 0.001, bar.get_y() + bar.get_height()/2, 
            f'{width:.4f}', ha='left', va='center', fontweight='bold', fontsize=9)

plt.tight_layout()
plt.savefig('../images/17_feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nüíæ Gr√°fico de import√¢ncia salvo!")

# %% [markdown]
# ## üíæ 11. Salvamento do Modelo Final

# %%
import pickle

print("=" * 80)
print("üíæ SALVANDO MODELO FINAL")
print("=" * 80)

# Salvar o melhor modelo (Gradient Boosting)
model_filename = '../models/fraud_detection_gb_model.pkl'

with open(model_filename, 'wb') as file:
    pickle.dump(gb_smote, file)

print(f"\n‚úÖ Modelo salvo: {model_filename}")

# Salvar tamb√©m informa√ß√µes do modelo
model_info = {
    'model_name': 'Gradient Boosting + SMOTE',
    'training_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'accuracy': accuracy_score(y_test, y_pred_gb),
    'precision': precision_score(y_test, y_pred_gb),
    'recall': recall_score(y_test, y_pred_gb),
    'f1_score': f1_score(y_test, y_pred_gb),
    'auc_roc': roc_auc_score(y_test, y_proba_gb),
    'features': list(X_train.columns),
    'n_features': len(X_train.columns)
}

with open('../models/model_info.pkl', 'wb') as file:
    pickle.dump(model_info, file)

print("‚úÖ Informa√ß√µes do modelo salvas: models/model_info.pkl")

# %% [markdown]
# ## üìã 12. Relat√≥rio Final de Resultados

# %%
print("\n" + "=" * 80)
print("üìã RELAT√ìRIO FINAL DO PROJETO")
print("=" * 80)

print(f"""
üéØ OBJETIVO DO PROJETO:
Desenvolver um sistema de detec√ß√£o de fraudes em cart√µes de cr√©dito
utilizando Machine Learning para minimizar perdas financeiras.

üìä DATASET:
‚Ä¢ Total de transa√ß√µes: {len(df_original):,}
‚Ä¢ Transa√ß√µes leg√≠timas: {(df_original['Class'] == 0).sum():,} (99.83%)
‚Ä¢ Transa√ß√µes fraudulentas: {(df_original['Class'] == 1).sum():,} (0.17%)
‚Ä¢ Features: {X_train.shape[1]} (V1-V28 + engineered features)

ü§ñ MODELOS TESTADOS:
1. Logistic Regression (Baseline)
2. Random Forest + SMOTE
3. Gradient Boosting + SMOTE ‚≠ê MELHOR MODELO

üìà RESULTADOS DO MELHOR MODELO (Gradient Boosting + SMOTE):
‚Ä¢ Accuracy:  {accuracy_score(y_test, y_pred_gb):.4f} ({accuracy_score(y_test, y_pred_gb)*100:.2f}%)
‚Ä¢ Precision: {precision_score(y_test, y_pred_gb):.4f} ({precision_score(y_test, y_pred_gb)*100:.2f}%)
‚Ä¢ Recall:    {recall_score(y_test, y_pred_gb):.4f} ({recall_score(y_test, y_pred_gb)*100:.2f}%) ‚ö†Ô∏è CR√çTICO!
‚Ä¢ F1-Score:  {f1_score(y_test, y_pred_gb):.4f}
‚Ä¢ AUC-ROC:   {roc_auc_score(y_test, y_proba_gb):.4f}

üéØ INTERPRETA√á√ÉO:
‚Ä¢ De cada 100 fraudes reais, detectamos ~{recall_score(y_test, y_pred_gb)*100:.0f}
‚Ä¢ De cada 100 alertas emitidos, ~{precision_score(y_test, y_pred_gb)*100:.0f} s√£o fraudes reais
‚Ä¢ Taxa de falsos positivos: {fp/(fp+tn)*100:.2f}%
‚Ä¢ Taxa de falsos negativos: {fn/(fn+tp)*100:.2f}%

üí∞ IMPACTO NO NEG√ìCIO:
‚Ä¢ Fraudes detectadas: {tp:,} de {tp+fn:,} ({tp/(tp+fn)*100:.2f}%)
‚Ä¢ Fraudes n√£o detectadas: {fn:,} ({fn/(fn+tp)*100:.2f}%)
‚Ä¢ Clientes impactados por falsos positivos: {fp:,}

üåü TOP 5 FEATURES MAIS IMPORTANTES:
{feature_importance.head(5).to_string(index=False)}

‚úÖ CONCLUS√ïES:
1. O modelo Gradient Boosting com SMOTE obteve os melhores resultados
2. Recall de {recall_score(y_test, y_pred_gb)*100:.2f}% indica boa capacidade de detectar fraudes
3. Precision de {precision_score(y_test, y_pred_gb)*100:.2f}% minimiza inconvenientes aos clientes
4. AUC-ROC de {roc_auc_score(y_test, y_proba_gb):.4f} demonstra excelente discrimina√ß√£o
5. Features V14, V17, V12 s√£o cr√≠ticas para detec√ß√£o

üöÄ PR√ìXIMOS PASSOS:
1. Deploy do modelo em ambiente de produ√ß√£o
2. Monitoramento cont√≠nuo de performance
3. Retreinamento peri√≥dico com novos dados
4. Ajuste de threshold baseado em custos de neg√≥cio
5. Implementa√ß√£o de explicabilidade (SHAP values)
""")

print("=" * 80)
print("‚úÖ PROJETO CONCLU√çDO COM SUCESSO!")
print("=" * 80)

# %% [markdown]
# ---
# ## üì¶ Arquivos Gerados
# 
# ### Dados:
# - `data/processed/creditcard_processed.csv`
# - `data/processed/X_train.csv`, `X_test.csv`
# - `data/processed/y_train.csv`, `y_test.csv`
# 
# ### Modelos:
# - `models/fraud_detection_gb_model.pkl` (modelo final)
# - `models/model_info.pkl` (metadados)
# 
# ### Visualiza√ß√µes:
# - `images/01-17_*.png` (17 visualiza√ß√µes est√°ticas)
# - `images/09_temporal_analysis_interactive.html`
# - `images/13_dashboard_interactive.html`
# 
# ---
# **Notebook criado por**: [Seu Nome]  
# **GitHub**: [seu-usuario]  
# **LinkedIn**: [seu-perfil]  
# **Data**: Setembro 2025

‚úÖ Bibliotecas importadas com sucesso!
‚úÖ Dados carregados com sucesso!

üìä Treino: 227,845 transa√ß√µes
   ‚Ä¢ Leg√≠timas: 227,451
   ‚Ä¢ Fraudes: 394

üìä Teste: 56,962 transa√ß√µes
   ‚Ä¢ Leg√≠timas: 56,864
   ‚Ä¢ Fraudes: 98
‚öñÔ∏è ESTRAT√âGIAS DE BALANCEAMENTO

‚úÖ SMOTE aplicado:
   ‚Ä¢ Antes: 227,845 transa√ß√µes
   ‚Ä¢ Depois: 341,176 transa√ß√µes
   ‚Ä¢ Fraudes: 113,725 (33.3%)

‚úÖ Undersampling aplicado:
   ‚Ä¢ Antes: 227,845 transa√ß√µes
   ‚Ä¢ Depois: 1,182 transa√ß√µes
   ‚Ä¢ Fraudes: 394 (33.3%)
üéØ MODELO BASELINE - LOGISTIC REGRESSION

üìä M√âTRICAS - Baseline (Sem Balanceamento):
   ‚Ä¢ Accuracy: 0.9991
   ‚Ä¢ Precision: 0.8052
   ‚Ä¢ Recall: 0.6327
   ‚Ä¢ F1-Score: 0.7086
   ‚Ä¢ AUC-ROC: 0.9533
üå≥ RANDOM FOREST + SMOTE

‚è±Ô∏è  Tempo de treinamento: 68.23 segundos

üìä M√âTRICAS - Random Forest + SMOTE:
   ‚Ä¢ Accuracy: 0.9977
   ‚Ä¢ Precision: 0.4208
   ‚Ä¢ Recall: 0.8673
   ‚Ä¢ F1-Score: 0.5667
   ‚Ä¢ AUC-ROC: 0.9838
üöÄ GRADIENT BOOSTING + SMOTE


KeyboardInterrupt: 