In [None]:
# CatBoostClassifier
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score

# === 1. Pilih fitur & target ===
features = ["jumlah burung pada titik x", "titik", "hour", "dayofweek", "is_weekend", "waktu_Dini Hari", "waktu_Malam", "waktu_Pagi", "waktu_Siang", "waktu_Sore", "cuaca_Cerah Berawan", "cuaca_Hujan", "cuaca_Mendung", "fase_Take Off"]
target = "strike"

# Use the already processed X and y
# X = df_model[features]
# y = df_model[target]

# === 2. Train-test split ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# === 3. Identifikasi kolom kategori ===
# Since we used one-hot encoding, there are no categorical features in X_train
# If you want to use CatBoost's internal categorical handling, you would need to adjust preprocessing.
# For now, we will treat all features as numerical as they are already one-hot encoded or numerical.
cat_features = [] # Assuming X is already preprocessed with one-hot encoding

# === 4. Hitung scale_pos_weight ===
neg, pos = y_train.value_counts()
scale_pos_weight = neg / pos
print("scale_pos_weight:", scale_pos_weight)

# === 5. Definisikan CatBoost ===
cat_model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.05,
    depth=8,
    eval_metric="AUC",
    random_seed=42,
    verbose=200,
    # cat_features=cat_features, # Remove this if treating all as numerical after one-hot
    scale_pos_weight=scale_pos_weight
)

# === 6. Training ===
# Create CatBoost Pool if using internal categorical handling, otherwise fit directly
# train_pool = Pool(data=X_train, label=y_train, cat_features=cat_features)
# test_pool = Pool(data=X_test, label=y_test, cat_features=cat_features)

# cat_model.fit(train_pool, eval_set=test_pool, early_stopping_rounds=100)
cat_model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=100)


# === 7. Prediksi & Evaluasi ===
y_prob = cat_model.predict_proba(X_test)[:, 1]
y_pred = cat_model.predict(X_test)

print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=4))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))

In [None]:
# CatBoostClassifier with treshold tuning
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score

# ambil probabilitas kelas 1 dari CatBoost
y_prob = cat_model.predict_proba(X_test)[:, 1]

# coba beberapa threshold
thresholds = [0.5, 0.3, 0.2, 0.1] # dicoba semakin besar (0,9)

for thr in thresholds:
    print(f"\n===== Threshold: {thr} =====")
    y_pred_thr = (y_prob >= thr).astype(int)

    cm = confusion_matrix(y_test, y_pred_thr)
    print("Confusion Matrix:\n", cm)

    print(classification_report(y_test, y_pred_thr, digits=4))

    roc_auc = roc_auc_score(y_test, y_prob)
    print("ROC-AUC:", roc_auc)

In [None]:
# EasyEnsembleClassifier (boosting khusus imbalance) & CatBoostClassifier (stabil di data imbalance)
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from imblearn.ensemble import EasyEnsembleClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Model base
easy = EasyEnsembleClassifier(
    n_estimators=10,
    random_state=42
)

cat = CatBoostClassifier(
    iterations=500,
    depth=6,
    learning_rate=0.1,
    scale_pos_weight=scale_pos_weight,  # pakai imbalance ratio
    verbose=0,
    random_state=42
)

# Meta-model (level-2)
meta = LogisticRegression(max_iter=1000, class_weight="balanced")

# Stacking Ensemble
stack_model = StackingClassifier(
    estimators=[('easy', easy), ('cat', cat)],
    final_estimator=meta,
    cv=5,
    n_jobs=-1,
    passthrough=True  # biar meta-model juga dapat input fitur asli
)

# Training
stack_model.fit(X_train, y_train)

# Evaluasi
y_pred = stack_model.predict(X_test)
y_prob = stack_model.predict_proba(X_test)[:, 1]

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, digits=4))
print("ROC-AUC:", roc_auc_score(y_test, y_prob))

# Makin jelek jangan dipake
# DO NOT USE THIS