In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

In [2]:
x_tr_resample = pd.read_csv('../../Data/clean/X_train_smote.csv')
X_test = pd.read_csv('../../Data/clean/X_test.csv')
y_tr_resample = np.loadtxt("../../Data/clean/y_train_smote.csv", delimiter=",")
y_test = np.loadtxt("../../Data/clean/y_test.csv", delimiter=",")

In [4]:
skf_grid = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [5]:
base_params = {
    'task_type': 'GPU',
    'devices': '0:1',  # Gunakan GPU pertama
    'gpu_ram_part': 0.95,  # Gunakan 95% memory GPU yang tersedia
}

In [6]:
catboost_params = {
    'iterations': [500],             # Kurangi iterasi untuk mempercepat proses
    'depth': [6],                    # Gunakan kedalaman pohon tunggal
    'learning_rate': [0.05],         # Gunakan 1 nilai yang moderat
    'l2_leaf_reg': [3],              # Tetapkan 1 nilai regularisasi
    'border_count': [128],           # Nilai optimal untuk GPU
    'bootstrap_type': ['Bernoulli'], # Pertahankan bootstrap Bernoulli
    'subsample': [0.8],              # Satu nilai subsample
    'grow_policy': ['SymmetricTree'],# Pertahankan kebijakan cepat
    'eval_metric': ['AUC'],          # Gunakan AUC sebagai metrik evaluasi
    'od_type': ['Iter'],             # Early stopping
    'od_wait': [30],                 # Kurangi iterasi tanpa peningkatan
}


In [7]:
init_params = base_params.copy()

In [8]:
model = CatBoostClassifier(**init_params)

In [9]:
grid_search = GridSearchCV(
        estimator=model,
        param_grid=catboost_params,
        cv=skf_grid,
        n_jobs=1,  # Gunakan 1 karena kita menggunakan GPU
        verbose=1,
        scoring='f1_weighted',
)

In [10]:
train_pool = Pool(x_tr_resample, y_tr_resample)

In [11]:
grid_search.fit(x_tr_resample, y_tr_resample)

Fitting 10 folds for each of 1 candidates, totalling 10 fits


Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 77.7ms	remaining: 38.8s
1:	total: 149ms	remaining: 37s
2:	total: 220ms	remaining: 36.5s
3:	total: 291ms	remaining: 36.1s
4:	total: 370ms	remaining: 36.6s
5:	total: 450ms	remaining: 37s
6:	total: 527ms	remaining: 37.1s
7:	total: 597ms	remaining: 36.7s
8:	total: 659ms	remaining: 36s
9:	total: 724ms	remaining: 35.5s
10:	total: 797ms	remaining: 35.4s
11:	total: 866ms	remaining: 35.2s
12:	total: 960ms	remaining: 36s
13:	total: 1.04s	remaining: 36.1s
14:	total: 1.11s	remaining: 35.9s
15:	total: 1.18s	remaining: 35.8s
16:	total: 1.24s	remaining: 35.3s
17:	total: 1.33s	remaining: 35.6s
18:	total: 1.42s	remaining: 35.9s
19:	total: 1.5s	remaining: 36s
20:	total: 1.6s	remaining: 36.4s
21:	total: 1.69s	remaining: 36.7s
22:	total: 1.75s	remaining: 36.4s
23:	total: 1.83s	remaining: 36.2s
24:	total: 1.89s	remaining: 35.9s
25:	total: 1.97s	remaining: 35.9s
26:	total: 2.06s	remaining: 36.2s
27:	total: 2.14s	remaining: 36.1s
28:	total: 2.21s	remaining: 35.9s
29:	total: 2.31s	remaining: 36.2s
3

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 99ms	remaining: 49.4s
1:	total: 195ms	remaining: 48.7s
2:	total: 274ms	remaining: 45.4s
3:	total: 340ms	remaining: 42.1s
4:	total: 418ms	remaining: 41.4s
5:	total: 511ms	remaining: 42.1s
6:	total: 580ms	remaining: 40.9s
7:	total: 641ms	remaining: 39.4s
8:	total: 686ms	remaining: 37.4s
9:	total: 726ms	remaining: 35.6s
10:	total: 787ms	remaining: 35s
11:	total: 867ms	remaining: 35.3s
12:	total: 948ms	remaining: 35.5s
13:	total: 1.02s	remaining: 35.4s
14:	total: 1.08s	remaining: 35.1s
15:	total: 1.16s	remaining: 35.1s
16:	total: 1.24s	remaining: 35.1s
17:	total: 1.33s	remaining: 35.6s
18:	total: 1.41s	remaining: 35.7s
19:	total: 1.49s	remaining: 35.9s
20:	total: 1.58s	remaining: 36s
21:	total: 1.67s	remaining: 36.4s
22:	total: 1.75s	remaining: 36.3s
23:	total: 1.81s	remaining: 36s
24:	total: 1.88s	remaining: 35.8s
25:	total: 1.96s	remaining: 35.6s
26:	total: 2.04s	remaining: 35.7s
27:	total: 2.11s	remaining: 35.6s
28:	total: 2.19s	remaining: 35.6s
29:	total: 2.28s	remaining: 35.

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 77.4ms	remaining: 38.6s
1:	total: 156ms	remaining: 38.9s
2:	total: 230ms	remaining: 38.1s
3:	total: 300ms	remaining: 37.1s
4:	total: 368ms	remaining: 36.4s
5:	total: 447ms	remaining: 36.8s
6:	total: 511ms	remaining: 36s
7:	total: 607ms	remaining: 37.3s
8:	total: 679ms	remaining: 37s
9:	total: 739ms	remaining: 36.2s
10:	total: 797ms	remaining: 35.4s
11:	total: 847ms	remaining: 34.5s
12:	total: 918ms	remaining: 34.4s
13:	total: 998ms	remaining: 34.6s
14:	total: 1.06s	remaining: 34.4s
15:	total: 1.13s	remaining: 34.3s
16:	total: 1.21s	remaining: 34.4s
17:	total: 1.3s	remaining: 34.8s
18:	total: 1.39s	remaining: 35.1s
19:	total: 1.48s	remaining: 35.4s
20:	total: 1.54s	remaining: 35.3s
21:	total: 1.65s	remaining: 35.8s
22:	total: 1.75s	remaining: 36.4s
23:	total: 1.84s	remaining: 36.5s
24:	total: 1.92s	remaining: 36.6s
25:	total: 2.01s	remaining: 36.6s
26:	total: 2.09s	remaining: 36.6s
27:	total: 2.16s	remaining: 36.5s
28:	total: 2.24s	remaining: 36.5s
29:	total: 2.33s	remaining: 

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 72.1ms	remaining: 36s
1:	total: 127ms	remaining: 31.6s
2:	total: 193ms	remaining: 31.9s
3:	total: 267ms	remaining: 33.1s
4:	total: 345ms	remaining: 34.1s
5:	total: 395ms	remaining: 32.5s
6:	total: 449ms	remaining: 31.6s
7:	total: 532ms	remaining: 32.7s
8:	total: 617ms	remaining: 33.7s
9:	total: 687ms	remaining: 33.7s
10:	total: 772ms	remaining: 34.3s
11:	total: 839ms	remaining: 34.1s
12:	total: 911ms	remaining: 34.1s
13:	total: 990ms	remaining: 34.4s
14:	total: 1.06s	remaining: 34.4s
15:	total: 1.13s	remaining: 34.2s
16:	total: 1.2s	remaining: 34.1s
17:	total: 1.28s	remaining: 34.4s
18:	total: 1.37s	remaining: 34.7s
19:	total: 1.44s	remaining: 34.7s
20:	total: 1.53s	remaining: 34.8s
21:	total: 1.59s	remaining: 34.6s
22:	total: 1.68s	remaining: 34.8s
23:	total: 1.75s	remaining: 34.6s
24:	total: 1.83s	remaining: 34.8s
25:	total: 1.89s	remaining: 34.5s
26:	total: 1.97s	remaining: 34.6s
27:	total: 2.05s	remaining: 34.5s
28:	total: 2.12s	remaining: 34.5s
29:	total: 2.21s	remaining

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 74.3ms	remaining: 37.1s
1:	total: 165ms	remaining: 41.1s
2:	total: 242ms	remaining: 40.1s
3:	total: 315ms	remaining: 39s
4:	total: 385ms	remaining: 38.1s
5:	total: 460ms	remaining: 37.9s
6:	total: 527ms	remaining: 37.1s
7:	total: 588ms	remaining: 36.2s
8:	total: 651ms	remaining: 35.5s
9:	total: 727ms	remaining: 35.6s
10:	total: 791ms	remaining: 35.2s
11:	total: 872ms	remaining: 35.4s
12:	total: 950ms	remaining: 35.6s
13:	total: 1.04s	remaining: 36s
14:	total: 1.1s	remaining: 35.7s
15:	total: 1.17s	remaining: 35.3s
16:	total: 1.23s	remaining: 35s
17:	total: 1.29s	remaining: 34.6s
18:	total: 1.36s	remaining: 34.5s
19:	total: 1.44s	remaining: 34.5s
20:	total: 1.52s	remaining: 34.6s
21:	total: 1.59s	remaining: 34.5s
22:	total: 1.63s	remaining: 33.9s
23:	total: 1.71s	remaining: 33.9s
24:	total: 1.77s	remaining: 33.7s
25:	total: 1.86s	remaining: 33.9s
26:	total: 1.94s	remaining: 33.9s
27:	total: 2s	remaining: 33.7s
28:	total: 2.06s	remaining: 33.5s
29:	total: 2.13s	remaining: 33.4s

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 89ms	remaining: 44.4s
1:	total: 168ms	remaining: 41.8s
2:	total: 221ms	remaining: 36.6s
3:	total: 283ms	remaining: 35.1s
4:	total: 341ms	remaining: 33.8s
5:	total: 417ms	remaining: 34.3s
6:	total: 482ms	remaining: 33.9s
7:	total: 530ms	remaining: 32.6s
8:	total: 575ms	remaining: 31.4s
9:	total: 644ms	remaining: 31.5s
10:	total: 731ms	remaining: 32.5s
11:	total: 809ms	remaining: 32.9s
12:	total: 884ms	remaining: 33.1s
13:	total: 960ms	remaining: 33.3s
14:	total: 1.02s	remaining: 33.1s
15:	total: 1.09s	remaining: 33s
16:	total: 1.17s	remaining: 33.4s
17:	total: 1.26s	remaining: 33.7s
18:	total: 1.34s	remaining: 34s
19:	total: 1.42s	remaining: 34.1s
20:	total: 1.5s	remaining: 34.2s
21:	total: 1.57s	remaining: 34.1s
22:	total: 1.64s	remaining: 34.1s
23:	total: 1.73s	remaining: 34.3s
24:	total: 1.8s	remaining: 34.2s
25:	total: 1.88s	remaining: 34.3s
26:	total: 1.96s	remaining: 34.4s
27:	total: 2.03s	remaining: 34.2s
28:	total: 2.1s	remaining: 34.1s
29:	total: 2.18s	remaining: 34.1

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 83.7ms	remaining: 41.8s
1:	total: 163ms	remaining: 40.6s
2:	total: 229ms	remaining: 38s
3:	total: 309ms	remaining: 38.3s
4:	total: 381ms	remaining: 37.7s
5:	total: 450ms	remaining: 37s
6:	total: 531ms	remaining: 37.4s
7:	total: 613ms	remaining: 37.7s
8:	total: 687ms	remaining: 37.5s
9:	total: 770ms	remaining: 37.7s
10:	total: 844ms	remaining: 37.5s
11:	total: 891ms	remaining: 36.2s
12:	total: 934ms	remaining: 35s
13:	total: 978ms	remaining: 34s
14:	total: 1.05s	remaining: 34s
15:	total: 1.14s	remaining: 34.4s
16:	total: 1.22s	remaining: 34.7s
17:	total: 1.31s	remaining: 35s
18:	total: 1.39s	remaining: 35.1s
19:	total: 1.47s	remaining: 35.3s
20:	total: 1.56s	remaining: 35.6s
21:	total: 1.64s	remaining: 35.6s
22:	total: 1.72s	remaining: 35.7s
23:	total: 1.81s	remaining: 35.9s
24:	total: 1.89s	remaining: 35.8s
25:	total: 1.98s	remaining: 36s
26:	total: 2.06s	remaining: 36.1s
27:	total: 2.14s	remaining: 36s
28:	total: 2.22s	remaining: 36s
29:	total: 2.3s	remaining: 36s
30:	total:

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 78.2ms	remaining: 39s
1:	total: 139ms	remaining: 34.7s
2:	total: 208ms	remaining: 34.5s
3:	total: 281ms	remaining: 34.9s
4:	total: 354ms	remaining: 35s
5:	total: 426ms	remaining: 35.1s
6:	total: 510ms	remaining: 35.9s
7:	total: 571ms	remaining: 35.1s
8:	total: 627ms	remaining: 34.2s
9:	total: 683ms	remaining: 33.5s
10:	total: 766ms	remaining: 34s
11:	total: 843ms	remaining: 34.3s
12:	total: 920ms	remaining: 34.5s
13:	total: 999ms	remaining: 34.7s
14:	total: 1.08s	remaining: 35s
15:	total: 1.16s	remaining: 35.2s
16:	total: 1.23s	remaining: 35s
17:	total: 1.34s	remaining: 35.9s
18:	total: 1.42s	remaining: 36s
19:	total: 1.52s	remaining: 36.5s
20:	total: 1.59s	remaining: 36.4s
21:	total: 1.67s	remaining: 36.3s
22:	total: 1.74s	remaining: 36.1s
23:	total: 1.82s	remaining: 36s
24:	total: 1.9s	remaining: 36.1s
25:	total: 1.99s	remaining: 36.4s
26:	total: 2.07s	remaining: 36.2s
27:	total: 2.15s	remaining: 36.2s
28:	total: 2.22s	remaining: 36.1s
29:	total: 2.3s	remaining: 36s
30:	tot

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 74.5ms	remaining: 37.2s
1:	total: 159ms	remaining: 39.5s
2:	total: 221ms	remaining: 36.6s
3:	total: 276ms	remaining: 34.3s
4:	total: 343ms	remaining: 33.9s
5:	total: 421ms	remaining: 34.6s
6:	total: 504ms	remaining: 35.5s
7:	total: 581ms	remaining: 35.7s
8:	total: 662ms	remaining: 36.1s
9:	total: 718ms	remaining: 35.2s
10:	total: 780ms	remaining: 34.7s
11:	total: 842ms	remaining: 34.2s
12:	total: 903ms	remaining: 33.8s
13:	total: 988ms	remaining: 34.3s
14:	total: 1.06s	remaining: 34.3s
15:	total: 1.15s	remaining: 34.9s
16:	total: 1.23s	remaining: 34.8s
17:	total: 1.3s	remaining: 34.8s
18:	total: 1.37s	remaining: 34.6s
19:	total: 1.45s	remaining: 34.7s
20:	total: 1.53s	remaining: 35s
21:	total: 1.61s	remaining: 34.9s
22:	total: 1.69s	remaining: 35s
23:	total: 1.75s	remaining: 34.8s
24:	total: 1.83s	remaining: 34.7s
25:	total: 1.9s	remaining: 34.6s
26:	total: 1.98s	remaining: 34.6s
27:	total: 2.04s	remaining: 34.5s
28:	total: 2.12s	remaining: 34.4s
29:	total: 2.18s	remaining: 3

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 83.5ms	remaining: 41.7s
1:	total: 152ms	remaining: 37.9s
2:	total: 217ms	remaining: 36s
3:	total: 301ms	remaining: 37.3s
4:	total: 389ms	remaining: 38.5s
5:	total: 466ms	remaining: 38.3s
6:	total: 555ms	remaining: 39.1s
7:	total: 621ms	remaining: 38.2s
8:	total: 681ms	remaining: 37.1s
9:	total: 744ms	remaining: 36.5s
10:	total: 812ms	remaining: 36.1s
11:	total: 886ms	remaining: 36s
12:	total: 959ms	remaining: 35.9s
13:	total: 1.03s	remaining: 35.7s
14:	total: 1.11s	remaining: 35.9s
15:	total: 1.19s	remaining: 36s
16:	total: 1.26s	remaining: 35.8s
17:	total: 1.37s	remaining: 36.7s
18:	total: 1.43s	remaining: 36.3s
19:	total: 1.5s	remaining: 36s
20:	total: 1.57s	remaining: 35.8s
21:	total: 1.64s	remaining: 35.7s
22:	total: 1.71s	remaining: 35.4s
23:	total: 1.79s	remaining: 35.5s
24:	total: 1.86s	remaining: 35.4s
25:	total: 1.94s	remaining: 35.4s
26:	total: 2.01s	remaining: 35.2s
27:	total: 2.1s	remaining: 35.4s
28:	total: 2.17s	remaining: 35.2s
29:	total: 2.23s	remaining: 35s
3

Default metric period is 5 because AUC is/are not implemented for GPU


0:	total: 83.5ms	remaining: 41.7s
1:	total: 154ms	remaining: 38.5s
2:	total: 232ms	remaining: 38.4s
3:	total: 305ms	remaining: 37.9s
4:	total: 377ms	remaining: 37.4s
5:	total: 448ms	remaining: 36.9s
6:	total: 516ms	remaining: 36.3s
7:	total: 598ms	remaining: 36.8s
8:	total: 678ms	remaining: 37s
9:	total: 740ms	remaining: 36.3s
10:	total: 807ms	remaining: 35.9s
11:	total: 860ms	remaining: 35s
12:	total: 931ms	remaining: 34.9s
13:	total: 1s	remaining: 34.9s
14:	total: 1.07s	remaining: 34.8s
15:	total: 1.15s	remaining: 34.9s
16:	total: 1.22s	remaining: 34.8s
17:	total: 1.29s	remaining: 34.6s
18:	total: 1.38s	remaining: 34.9s
19:	total: 1.45s	remaining: 34.8s
20:	total: 1.52s	remaining: 34.7s
21:	total: 1.59s	remaining: 34.6s
22:	total: 1.66s	remaining: 34.5s
23:	total: 1.73s	remaining: 34.3s
24:	total: 1.8s	remaining: 34.2s
25:	total: 1.87s	remaining: 34.1s
26:	total: 1.94s	remaining: 33.9s
27:	total: 2s	remaining: 33.8s
28:	total: 2.09s	remaining: 33.9s
29:	total: 2.15s	remaining: 33.7s


In [None]:
# Get best parameters
best_params = grid_search.best_params_

In [12]:


# Create new model with best parameters
best_model = CatBoostClassifier(**best_params, silent=True, random_state=42)



In [14]:
skf_eval = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

accuracies = []
recalls = []
precisions = []
f1_scores = []

for fold, (train_idx, val_idx) in enumerate(skf_eval.split(x_tr_resample, y_tr_resample)):
    # Split data
    X_train_fold = x_tr_resample.iloc[train_idx]
    y_train_fold = y_tr_resample[train_idx]
    X_val_fold = x_tr_resample.iloc[val_idx]
    y_val_fold = y_tr_resample[val_idx]
    
    # Train model
    best_model.fit(X_train_fold, y_train_fold)
    
    # Make predictions
    y_pred_fold = best_model.predict(X_val_fold)
    
    # Calculate metrics
    accuracies.append(accuracy_score(y_val_fold, y_pred_fold))
    recalls.append(recall_score(y_val_fold, y_pred_fold, average='weighted'))
    precisions.append(precision_score(y_val_fold, y_pred_fold, average='weighted'))
    f1_scores.append(f1_score(y_val_fold, y_pred_fold, average='weighted'))

In [15]:
best_model.fit(x_tr_resample, y_tr_resample)
y_pred_test = best_model.predict(X_test)

In [16]:
print("CatBoost Results:")
print("-" * 50)
print("Best Parameters:", best_params)
print("\nCross-validation Results (10-fold):")
print(f"Accuracy    : {np.mean(accuracies)*100:.2f}% (+/- {np.std(accuracies)*100:.2f}%)")
print(f"Recall      : {np.mean(recalls)*100:.2f}% (+/- {np.std(recalls)*100:.2f}%)")
print(f"Precision   : {np.mean(precisions)*100:.2f}% (+/- {np.std(precisions)*100:.2f}%)")
print(f"F1-Score    : {np.mean(f1_scores)*100:.2f}% (+/- {np.std(f1_scores)*100:.2f}%)")

print("\nTest Set Results:")
print(f"Accuracy    : {accuracy_score(y_test, y_pred_test)*100:.2f}%")
print(f"Recall      : {recall_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print(f"Precision   : {precision_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print(f"F1-Score    : {f1_score(y_test, y_pred_test, average='weighted')*100:.2f}%")
print("-" * 50)

CatBoost Results:
--------------------------------------------------
Best Parameters: {'bootstrap_type': 'Bernoulli', 'border_count': 128, 'depth': 6, 'eval_metric': 'AUC', 'grow_policy': 'SymmetricTree', 'iterations': 500, 'l2_leaf_reg': 3, 'learning_rate': 0.05, 'od_type': 'Iter', 'od_wait': 30, 'subsample': 0.8}

Cross-validation Results (10-fold):
Accuracy    : 99.43% (+/- 0.14%)
Recall      : 99.43% (+/- 0.14%)
Precision   : 99.43% (+/- 0.14%)
F1-Score    : 99.43% (+/- 0.14%)

Test Set Results:
Accuracy    : 99.34%
Recall      : 99.34%
Precision   : 99.34%
F1-Score    : 99.34%
--------------------------------------------------
