In [5]:
# Random Forest with modified parameters and without SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# Load dataset
train = pd.read_csv("../data/train.csv")

# Separation features e target
X = train.drop(columns=["id", "target"])
y = train["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [8]:
# Initialising model with modified parameters
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_leaf=10,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

# Training
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Assessment
print("📊 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\n📄 Classification Report:")
print(classification_report(y_test, y_pred))

print("\n🎯 ROC-AUC Score:", roc_auc_score(y_test, rf_model.predict_proba(X_test)[:,1]))


📊 Confusion Matrix:
[[83779 30925]
 [ 2471  1868]]

📄 Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.73      0.83    114704
           1       0.06      0.43      0.10      4339

    accuracy                           0.72    119043
   macro avg       0.51      0.58      0.47    119043
weighted avg       0.94      0.72      0.81    119043


🎯 ROC-AUC Score: 0.6230476919443724
