In [2]:
# XGBoost with parameter tuning and scale_pos_weight 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import numpy as np

df = pd.read_csv("../data/train.csv")

# Remove ID
df = df.drop(columns=['id'])

# Divide the features and the target audience
X = df.drop("target", axis=1)
y = df["target"]

# Encode categorical, if any
for col in X.select_dtypes(include='object').columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])

# Calculate scale_pos_weight (useful for unbalanced datasets)
scale_pos_weight = (y == 0).sum() / (y == 1).sum()
print(f"scale_pos_weight: {scale_pos_weight:.2f}")

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)


scale_pos_weight: 26.44


In [3]:
# Initialise the classifier with basic tuning
model = xgb.XGBClassifier(
    objective='binary:logistic',
    eval_metric='logloss',
    scale_pos_weight=scale_pos_weight,
    learning_rate=0.1,
    max_depth=5,
    n_estimators=100,
    use_label_encoder=False,
    random_state=42
)

# Fit
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

# Evaluation
print("📊 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\n📄 Classification Report:")
print(classification_report(y_test, y_pred))

print(f"\n🎯 ROC-AUC Score: {roc_auc_score(y_test, y_pred_proba):.4f}")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


📊 Confusion Matrix:
[[75227 39477]
 [ 2014  2325]]

📄 Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.66      0.78    114704
           1       0.06      0.54      0.10      4339

    accuracy                           0.65    119043
   macro avg       0.51      0.60      0.44    119043
weighted avg       0.94      0.65      0.76    119043


🎯 ROC-AUC Score: 0.6351
