In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score

In [2]:
x_tr_resample = pd.read_csv('../../Data/clean/X_train_smote.csv')
X_test = pd.read_csv('../../Data/clean/X_test.csv')
y_tr_resample = np.loadtxt("../../Data/clean/y_train_smote.csv", delimiter=",")
y_test = np.loadtxt("../../Data/clean/y_test.csv", delimiter=",")

In [3]:
skf_grid = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [4]:
base_params = {
    'task_type': 'GPU',
    'devices': '0:1',  # Gunakan GPU pertama
    'gpu_ram_part': 0.95,  # Gunakan 95% memory GPU yang tersedia
}

In [5]:
catboost_params = {
    'iterations': [500],             # Kurangi iterasi untuk mempercepat proses
    'depth': [6],                    # Gunakan kedalaman pohon tunggal
    'learning_rate': [0.05],         # Gunakan 1 nilai yang moderat
    'l2_leaf_reg': [3],              # Tetapkan 1 nilai regularisasi
    'border_count': [128],           # Nilai optimal untuk GPU
    'bootstrap_type': ['Bernoulli'], # Pertahankan bootstrap Bernoulli
    'subsample': [0.8],              # Satu nilai subsample
    'grow_policy': ['SymmetricTree'],# Pertahankan kebijakan cepat
    'eval_metric': ['AUC'],          # Gunakan AUC sebagai metrik evaluasi
    'od_type': ['Iter'],             # Early stopping
    'od_wait': [30],                 # Kurangi iterasi tanpa peningkatan
}


In [6]:
init_params = base_params.copy()

In [7]:
model = CatBoostClassifier(**init_params)

In [8]:
grid_search = GridSearchCV(
        estimator=model,
        param_grid=catboost_params,
        cv=skf_grid,
        n_jobs=1,  # Gunakan 1 karena kita menggunakan GPU
        verbose=1,
        scoring='f1_weighted',
)

In [9]:
train_pool = Pool(x_tr_resample, y_tr_resample)

In [10]:
grid_search.fit(x_tr_resample, y_tr_resample)

Fitting 10 folds for each of 1 candidates, totalling 10 fits


Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 381ms	remaining: 3m 10s
1:	total: 619ms	remaining: 2m 34s
2:	total: 726ms	remaining: 2m
3:	total: 817ms	remaining: 1m 41s
4:	total: 917ms	remaining: 1m 30s
5:	total: 1.03s	remaining: 1m 24s
6:	total: 1.12s	remaining: 1m 18s
7:	total: 1.21s	remaining: 1m 14s
8:	total: 1.3s	remaining: 1m 11s
9:	total: 1.39s	remaining: 1m 8s
10:	total: 1.52s	remaining: 1m 7s
11:	total: 1.66s	remaining: 1m 7s
12:	total: 1.76s	remaining: 1m 5s
13:	total: 1.92s	remaining: 1m 6s
14:	total: 2.05s	remaining: 1m 6s
15:	total: 2.14s	remaining: 1m 4s
16:	total: 2.23s	remaining: 1m 3s
17:	total: 2.32s	remaining: 1m 2s
18:	total: 2.4s	remaining: 1m
19:	total: 2.48s	remaining: 59.5s
20:	total: 2.55s	remaining: 58.3s
21:	total: 2.64s	remaining: 57.3s
22:	total: 2.72s	remaining: 56.4s
23:	total: 2.79s	remaining: 55.4s
24:	total: 2.9s	remaining: 55s
25:	total: 2.98s	remaining: 54.4s
26:	total: 3.08s	remaining: 53.9s
27:	total: 3.18s	remaining: 53.7s
28:	total: 3.28s	remaining: 53.2s
29:	total: 3.36s	remaining:

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 85ms	remaining: 42.4s
1:	total: 178ms	remaining: 44.3s
2:	total: 276ms	remaining: 45.7s
3:	total: 368ms	remaining: 45.7s
4:	total: 459ms	remaining: 45.4s
5:	total: 554ms	remaining: 45.6s
6:	total: 650ms	remaining: 45.8s
7:	total: 720ms	remaining: 44.3s
8:	total: 816ms	remaining: 44.5s
9:	total: 911ms	remaining: 44.6s
10:	total: 1.01s	remaining: 45s
11:	total: 1.08s	remaining: 44s
12:	total: 1.17s	remaining: 43.9s
13:	total: 1.3s	remaining: 45.3s
14:	total: 1.47s	remaining: 47.5s
15:	total: 1.53s	remaining: 46.5s
16:	total: 1.6s	remaining: 45.6s
17:	total: 1.69s	remaining: 45.3s
18:	total: 1.79s	remaining: 45.2s
19:	total: 1.89s	remaining: 45.3s
20:	total: 1.98s	remaining: 45.2s
21:	total: 2.07s	remaining: 44.9s
22:	total: 2.17s	remaining: 44.9s
23:	total: 2.26s	remaining: 44.9s
24:	total: 2.36s	remaining: 44.8s
25:	total: 2.45s	remaining: 44.7s
26:	total: 2.55s	remaining: 44.6s
27:	total: 2.65s	remaining: 44.7s
28:	total: 2.74s	remaining: 44.5s
29:	total: 2.82s	remaining: 44.

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 89.4ms	remaining: 44.6s
1:	total: 166ms	remaining: 41.3s
2:	total: 258ms	remaining: 42.8s
3:	total: 361ms	remaining: 44.8s
4:	total: 429ms	remaining: 42.5s
5:	total: 491ms	remaining: 40.4s
6:	total: 576ms	remaining: 40.6s
7:	total: 667ms	remaining: 41s
8:	total: 757ms	remaining: 41.3s
9:	total: 875ms	remaining: 42.9s
10:	total: 963ms	remaining: 42.8s
11:	total: 1.03s	remaining: 42s
12:	total: 1.1s	remaining: 41.2s
13:	total: 1.16s	remaining: 40.4s
14:	total: 1.23s	remaining: 39.7s
15:	total: 1.3s	remaining: 39.4s
16:	total: 1.37s	remaining: 39s
17:	total: 1.47s	remaining: 39.3s
18:	total: 1.54s	remaining: 39s
19:	total: 1.61s	remaining: 38.6s
20:	total: 1.68s	remaining: 38.3s
21:	total: 1.75s	remaining: 38s
22:	total: 1.84s	remaining: 38.1s
23:	total: 1.93s	remaining: 38.3s
24:	total: 2.02s	remaining: 38.3s
25:	total: 2.1s	remaining: 38.2s
26:	total: 2.16s	remaining: 37.9s
27:	total: 2.24s	remaining: 37.8s
28:	total: 2.32s	remaining: 37.7s
29:	total: 2.4s	remaining: 37.7s
30:

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 166ms	remaining: 1m 22s
1:	total: 321ms	remaining: 1m 20s
2:	total: 479ms	remaining: 1m 19s
3:	total: 634ms	remaining: 1m 18s
4:	total: 788ms	remaining: 1m 18s
5:	total: 927ms	remaining: 1m 16s
6:	total: 1.06s	remaining: 1m 15s
7:	total: 1.22s	remaining: 1m 15s
8:	total: 1.39s	remaining: 1m 15s
9:	total: 1.5s	remaining: 1m 13s
10:	total: 1.58s	remaining: 1m 10s
11:	total: 1.67s	remaining: 1m 7s
12:	total: 1.76s	remaining: 1m 5s
13:	total: 1.86s	remaining: 1m 4s
14:	total: 1.96s	remaining: 1m 3s
15:	total: 2.06s	remaining: 1m 2s
16:	total: 2.15s	remaining: 1m 1s
17:	total: 2.24s	remaining: 1m
18:	total: 2.35s	remaining: 59.4s
19:	total: 2.45s	remaining: 58.7s
20:	total: 2.54s	remaining: 58.1s
21:	total: 2.84s	remaining: 1m 1s
22:	total: 2.96s	remaining: 1m 1s
23:	total: 3.02s	remaining: 1m
24:	total: 3.08s	remaining: 58.5s
25:	total: 3.14s	remaining: 57.2s
26:	total: 3.21s	remaining: 56.3s
27:	total: 3.3s	remaining: 55.7s
28:	total: 3.4s	remaining: 55.3s
29:	total: 3.5s	remain

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 97ms	remaining: 48.4s
1:	total: 182ms	remaining: 45.3s
2:	total: 276ms	remaining: 45.7s
3:	total: 364ms	remaining: 45.2s
4:	total: 454ms	remaining: 45s
5:	total: 562ms	remaining: 46.3s
6:	total: 668ms	remaining: 47.1s
7:	total: 746ms	remaining: 45.9s
8:	total: 825ms	remaining: 45s
9:	total: 915ms	remaining: 44.8s
10:	total: 1.01s	remaining: 44.8s
11:	total: 1.09s	remaining: 44.3s
12:	total: 1.17s	remaining: 43.8s
13:	total: 1.26s	remaining: 43.8s
14:	total: 1.43s	remaining: 46.2s
15:	total: 1.7s	remaining: 51.3s
16:	total: 1.81s	remaining: 51.3s
17:	total: 1.87s	remaining: 50.1s
18:	total: 1.96s	remaining: 49.6s
19:	total: 2.05s	remaining: 49.1s
20:	total: 2.13s	remaining: 48.7s
21:	total: 2.23s	remaining: 48.4s
22:	total: 2.33s	remaining: 48.3s
23:	total: 2.41s	remaining: 47.8s
24:	total: 2.49s	remaining: 47.3s
25:	total: 2.57s	remaining: 46.9s
26:	total: 2.66s	remaining: 46.7s
27:	total: 2.75s	remaining: 46.5s
28:	total: 2.85s	remaining: 46.2s
29:	total: 2.93s	remaining: 46

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 120ms	remaining: 59.7s
1:	total: 188ms	remaining: 46.9s
2:	total: 280ms	remaining: 46.4s
3:	total: 369ms	remaining: 45.7s
4:	total: 479ms	remaining: 47.4s
5:	total: 589ms	remaining: 48.5s
6:	total: 689ms	remaining: 48.6s
7:	total: 775ms	remaining: 47.7s
8:	total: 901ms	remaining: 49.1s
9:	total: 984ms	remaining: 48.2s
10:	total: 1.05s	remaining: 46.7s
11:	total: 1.14s	remaining: 46.3s
12:	total: 1.23s	remaining: 45.9s
13:	total: 1.31s	remaining: 45.6s
14:	total: 1.4s	remaining: 45.3s
15:	total: 1.49s	remaining: 45.1s
16:	total: 1.58s	remaining: 44.9s
17:	total: 1.68s	remaining: 44.9s
18:	total: 1.78s	remaining: 45s
19:	total: 1.88s	remaining: 45s
20:	total: 1.98s	remaining: 45.1s
21:	total: 2.07s	remaining: 45s
22:	total: 2.17s	remaining: 45s
23:	total: 2.26s	remaining: 44.9s
24:	total: 2.36s	remaining: 44.8s
25:	total: 2.48s	remaining: 45.2s
26:	total: 2.56s	remaining: 44.8s
27:	total: 2.63s	remaining: 44.3s
28:	total: 2.79s	remaining: 45.4s
29:	total: 2.98s	remaining: 46.7s

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 148ms	remaining: 1m 13s
1:	total: 301ms	remaining: 1m 14s
2:	total: 455ms	remaining: 1m 15s
3:	total: 601ms	remaining: 1m 14s
4:	total: 754ms	remaining: 1m 14s
5:	total: 903ms	remaining: 1m 14s
6:	total: 1.04s	remaining: 1m 13s
7:	total: 1.18s	remaining: 1m 12s
8:	total: 1.32s	remaining: 1m 12s
9:	total: 1.47s	remaining: 1m 12s
10:	total: 1.64s	remaining: 1m 12s
11:	total: 1.77s	remaining: 1m 11s
12:	total: 1.86s	remaining: 1m 9s
13:	total: 1.97s	remaining: 1m 8s
14:	total: 2.16s	remaining: 1m 9s
15:	total: 2.29s	remaining: 1m 9s
16:	total: 2.35s	remaining: 1m 6s
17:	total: 2.43s	remaining: 1m 5s
18:	total: 2.51s	remaining: 1m 3s
19:	total: 2.61s	remaining: 1m 2s
20:	total: 2.71s	remaining: 1m 1s
21:	total: 2.8s	remaining: 1m
22:	total: 2.88s	remaining: 59.7s
23:	total: 2.97s	remaining: 58.9s
24:	total: 3.06s	remaining: 58.1s
25:	total: 3.13s	remaining: 57.1s
26:	total: 3.23s	remaining: 56.5s
27:	total: 3.31s	remaining: 55.7s
28:	total: 3.39s	remaining: 55.1s
29:	total: 3.47s

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 90ms	remaining: 44.9s
1:	total: 165ms	remaining: 41s
2:	total: 244ms	remaining: 40.4s
3:	total: 327ms	remaining: 40.5s
4:	total: 427ms	remaining: 42.2s
5:	total: 513ms	remaining: 42.2s
6:	total: 584ms	remaining: 41.1s
7:	total: 674ms	remaining: 41.4s
8:	total: 779ms	remaining: 42.5s
9:	total: 886ms	remaining: 43.4s
10:	total: 975ms	remaining: 43.3s
11:	total: 1.07s	remaining: 43.6s
12:	total: 1.17s	remaining: 43.7s
13:	total: 1.26s	remaining: 43.7s
14:	total: 1.33s	remaining: 43s
15:	total: 1.4s	remaining: 42.5s
16:	total: 1.47s	remaining: 41.9s
17:	total: 1.55s	remaining: 41.6s
18:	total: 1.63s	remaining: 41.3s
19:	total: 1.72s	remaining: 41.4s
20:	total: 2s	remaining: 45.6s
21:	total: 2.16s	remaining: 47s
22:	total: 2.24s	remaining: 46.5s
23:	total: 2.31s	remaining: 45.8s
24:	total: 2.38s	remaining: 45.1s
25:	total: 2.48s	remaining: 45.2s
26:	total: 2.58s	remaining: 45.2s
27:	total: 2.68s	remaining: 45.2s
28:	total: 2.78s	remaining: 45.1s
29:	total: 2.88s	remaining: 45.2s
3

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 103ms	remaining: 51.3s
1:	total: 210ms	remaining: 52.3s
2:	total: 327ms	remaining: 54.1s
3:	total: 419ms	remaining: 52s
4:	total: 518ms	remaining: 51.3s
5:	total: 632ms	remaining: 52.1s
6:	total: 748ms	remaining: 52.7s
7:	total: 847ms	remaining: 52.1s
8:	total: 947ms	remaining: 51.7s
9:	total: 1.06s	remaining: 51.8s
10:	total: 1.16s	remaining: 51.5s
11:	total: 1.26s	remaining: 51.4s
12:	total: 1.38s	remaining: 51.8s
13:	total: 1.51s	remaining: 52.5s
14:	total: 1.63s	remaining: 52.6s
15:	total: 1.73s	remaining: 52.5s
16:	total: 1.84s	remaining: 52.4s
17:	total: 1.95s	remaining: 52.3s
18:	total: 2.05s	remaining: 52s
19:	total: 2.15s	remaining: 51.6s
20:	total: 2.26s	remaining: 51.6s
21:	total: 2.34s	remaining: 50.9s
22:	total: 2.41s	remaining: 50s
23:	total: 2.48s	remaining: 49.1s
24:	total: 2.56s	remaining: 48.6s
25:	total: 2.67s	remaining: 48.6s
26:	total: 2.77s	remaining: 48.4s
27:	total: 2.84s	remaining: 47.9s
28:	total: 2.91s	remaining: 47.3s
29:	total: 3s	remaining: 47.1s

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 99.4ms	remaining: 49.6s
1:	total: 201ms	remaining: 50.2s
2:	total: 299ms	remaining: 49.5s
3:	total: 420ms	remaining: 52.1s
4:	total: 503ms	remaining: 49.8s
5:	total: 579ms	remaining: 47.6s
6:	total: 674ms	remaining: 47.4s
7:	total: 791ms	remaining: 48.6s
8:	total: 890ms	remaining: 48.6s
9:	total: 989ms	remaining: 48.5s
10:	total: 1.13s	remaining: 50.4s
11:	total: 1.3s	remaining: 53s
12:	total: 1.38s	remaining: 51.9s
13:	total: 1.45s	remaining: 50.2s
14:	total: 1.52s	remaining: 49.1s
15:	total: 1.61s	remaining: 48.6s
16:	total: 1.71s	remaining: 48.6s
17:	total: 1.81s	remaining: 48.5s
18:	total: 1.9s	remaining: 48.2s
19:	total: 2.01s	remaining: 48.2s
20:	total: 2.1s	remaining: 47.9s
21:	total: 2.19s	remaining: 47.7s
22:	total: 2.3s	remaining: 47.7s
23:	total: 2.41s	remaining: 47.8s
24:	total: 2.5s	remaining: 47.6s
25:	total: 2.6s	remaining: 47.4s
26:	total: 2.68s	remaining: 46.9s
27:	total: 2.77s	remaining: 46.7s
28:	total: 2.86s	remaining: 46.5s
29:	total: 2.98s	remaining: 46.

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 104ms	remaining: 52s
1:	total: 220ms	remaining: 54.8s
2:	total: 316ms	remaining: 52.4s
3:	total: 431ms	remaining: 53.5s
4:	total: 524ms	remaining: 51.9s
5:	total: 599ms	remaining: 49.3s
6:	total: 696ms	remaining: 49s
7:	total: 786ms	remaining: 48.3s
8:	total: 882ms	remaining: 48.1s
9:	total: 988ms	remaining: 48.4s
10:	total: 1.07s	remaining: 47.8s
11:	total: 1.17s	remaining: 47.7s
12:	total: 1.27s	remaining: 47.7s
13:	total: 1.37s	remaining: 47.5s
14:	total: 1.47s	remaining: 47.6s
15:	total: 1.57s	remaining: 47.6s
16:	total: 1.68s	remaining: 47.8s
17:	total: 1.78s	remaining: 47.8s
18:	total: 1.89s	remaining: 47.9s
19:	total: 2.01s	remaining: 48.3s
20:	total: 2.12s	remaining: 48.4s
21:	total: 2.24s	remaining: 48.7s
22:	total: 2.34s	remaining: 48.6s
23:	total: 2.44s	remaining: 48.5s
24:	total: 2.55s	remaining: 48.4s
25:	total: 2.65s	remaining: 48.4s
26:	total: 2.75s	remaining: 48.1s
27:	total: 2.85s	remaining: 48.1s
28:	total: 2.96s	remaining: 48.1s
29:	total: 3.07s	remaining: 

In [11]:
# Get best parameters
best_params = grid_search.best_params_

In [12]:


# Create new model with best parameters
best_model = CatBoostClassifier(**best_params, silent=True, random_state=42)



In [13]:
skf_eval = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracies = []
recalls = []
precisions = []
f1_scores = []
roc_aucs = []

for fold, (train_idx, val_idx) in enumerate(skf_eval.split(x_tr_resample, y_tr_resample)):
    # Split data
    X_train_fold = x_tr_resample.iloc[train_idx]
    y_train_fold = y_tr_resample[train_idx]
    X_val_fold = x_tr_resample.iloc[val_idx]
    y_val_fold = y_tr_resample[val_idx]
    
    # Train model
    best_model.fit(X_train_fold, y_train_fold)
    
    # Make predictions
    y_pred_fold = best_model.predict(X_val_fold)
    y_pred_proba_fold = best_model.predict_proba(X_val_fold)[:, 1]
    
    # Calculate metrics
    accuracies.append(accuracy_score(y_val_fold, y_pred_fold))
    recalls.append(recall_score(y_val_fold, y_pred_fold, average='weighted'))
    precisions.append(precision_score(y_val_fold, y_pred_fold, average='weighted'))
    f1_scores.append(f1_score(y_val_fold, y_pred_fold, average='weighted'))
    roc_aucs.append(roc_auc_score(y_val_fold, y_pred_proba_fold))

In [14]:
best_model.fit(x_tr_resample, y_tr_resample)
y_pred_test = best_model.predict(X_test)

y_pred_test_proba = best_model.predict_proba(X_test)[:, 1]

y_train_pred = best_model.predict(x_tr_resample)

In [15]:
print("CatBoost Results:")
print("-" * 50)
print("Best Parameters:", best_params)
print("\nCross-validation Results (10-fold):")
print(f"Accuracy    : {np.mean(accuracies)*100:.2f}% (+/- {np.std(accuracies)*100:.2f}%)")
print(f"Recall      : {np.mean(recalls)*100:.2f}% (+/- {np.std(recalls)*100:.2f}%)")
print(f"Precision   : {np.mean(precisions)*100:.2f}% (+/- {np.std(precisions)*100:.2f}%)")
print(f"F1-Score    : {np.mean(f1_scores)*100:.2f}% (+/- {np.std(f1_scores)*100:.2f}%)")

print("\nTrain Set Results:")
print(f"Accuracy    : {accuracy_score(y_tr_resample, y_train_pred)*100:.2f}%")
print(f"Recall      : {recall_score(y_tr_resample, y_train_pred, average='weighted')*100:.2f}%")
print(f"Precision   : {precision_score(y_tr_resample, y_train_pred, average='weighted')*100:.2f}%")
print(f"F1-Score    : {f1_score(y_tr_resample, y_train_pred, average='weighted')*100:.2f}%")

print("\nTest Set Results:")
print(f"Accuracy    : {accuracy_score(y_test, y_pred_test)*100:.2f}%")
print(f"Recall      : {recall_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print(f"Precision   : {precision_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print(f"F1-Score    : {f1_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print(f"ROC AUC     : {roc_auc_score(y_test, y_pred_test_proba)*100:.2f}%")
print("-" * 50)

CatBoost Results:
--------------------------------------------------
Best Parameters: {'bootstrap_type': 'Bernoulli', 'border_count': 128, 'depth': 6, 'eval_metric': 'AUC', 'grow_policy': 'SymmetricTree', 'iterations': 500, 'l2_leaf_reg': 3, 'learning_rate': 0.05, 'od_type': 'Iter', 'od_wait': 30, 'subsample': 0.8}

Cross-validation Results (10-fold):
Accuracy    : 99.43% (+/- 0.14%)
Recall      : 99.43% (+/- 0.14%)
Precision   : 99.43% (+/- 0.14%)
F1-Score    : 99.43% (+/- 0.14%)

Train Set Results:
Accuracy    : 99.93%
Recall      : 99.93%
Precision   : 99.93%
F1-Score    : 99.93%

Test Set Results:
Accuracy    : 99.34%
Recall      : 99.34%
Precision   : 99.34%
F1-Score    : 99.34%
ROC AUC     : 99.93%
--------------------------------------------------
