In [6]:
import numpy as np
import joblib
import pickle
from sklearn.model_selection import StratifiedKFold
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import (
    accuracy_score, 
    classification_report, 
    confusion_matrix
)

In [7]:
def catboost_tuning(x_tr_resample, y_tr_resample, X_test, y_test):
    # Definisikan parameter grid yang lebih spesifik
    catboost_params = {
        'iterations': [300, 500, 750],
        'learning_rate': [0.01, 0.1, 0.2],
        'depth': [4, 6, 8],
        'l2_leaf_reg': [1, 3, 5],
        'random_strength': [0.1, 0.5, 1.0]
    }
    
    # Inisialisasi variabel untuk menyimpan hasil terbaik
    best_score = 0
    best_params = {}
    best_model = None
    
    # Buat Pool untuk training dan testing
    train_pool = Pool(x_tr_resample, y_tr_resample)
    test_pool = Pool(X_test, y_test)
    
    # Lakukan manual grid search dengan kontrol lebih baik
    for iterations in catboost_params['iterations']:
        for learning_rate in catboost_params['learning_rate']:
            for depth in catboost_params['depth']:
                for l2_leaf_reg in catboost_params['l2_leaf_reg']:
                    for random_strength in catboost_params['random_strength']:
                        # Konfigurasi model CatBoost
                        model = CatBoostClassifier(
                            iterations=iterations,
                            learning_rate=learning_rate,
                            depth=depth,
                            l2_leaf_reg=l2_leaf_reg,
                            random_strength=random_strength,
                            task_type='GPU',  # Gunakan GPU
                            devices='0',     # Gunakan GPU pertama
                            verbose=0,
                            random_seed=42
                        )
                        
                        # Latih model
                        model.fit(train_pool)
                        
                        # Prediksi dan hitung akurasi
                        y_pred = model.predict(test_pool)
                        accuracy = accuracy_score(y_test, y_pred)
                        
                        # Update model terbaik jika akurasi lebih baik
                        if accuracy > best_score:
                            best_score = accuracy
                            best_params = {
                                'iterations': iterations,
                                'learning_rate': learning_rate,
                                'depth': depth,
                                'l2_leaf_reg': l2_leaf_reg,
                                'random_strength': random_strength
                            }
                            best_model = model
    
    # Cetak hasil
    print("\nCatBoost Tuning Results:")
    print("Best Hyperparameters:", best_params)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy * 100:.2f}%")
    
    # Prediksi akhir dengan model terbaik
    y_pred = best_model.predict(test_pool)
    
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    
    return best_model, best_score

In [8]:
def main():
    # Load the data
    data = np.load('../../Data/#1/processed_data.npz')
    x_tr_resample = data['x_tr_resample']
    y_tr_resample = data['y_tr_resample']
    X_test = data['X_test']
    y_test = data['y_test']
    X_train = data['X_train']

    with open('../../Data/#1/power_transformer.pkl', 'rb') as f:
        norm = pickle.load(f)

    # Load normalization transformer
    # norm = joblib.load('../../Data/#1/power_transformer.joblib')

    # Normalize features
    norm_train_feature = norm.fit_transform(X_train)
    norm_test_feature = norm.transform(X_test)

    # Run CatBoost tuning
    best_model, accuracy = catboost_tuning(x_tr_resample, y_tr_resample, norm_test_feature, y_test)
    
    # Optional: Save the best model
    # joblib.dump(best_model, 'catboost_best_model.joblib')

In [9]:
if __name__ == "__main__":
    main()


CatBoost Tuning Results:
Best Hyperparameters: {'iterations': 750, 'learning_rate': 0.2, 'depth': 8, 'l2_leaf_reg': 1, 'random_strength': 0.5}
Accuracy: 98.07%

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1547
           1       0.96      0.96      0.96       422

    accuracy                           0.98      1969
   macro avg       0.98      0.97      0.98      1969
weighted avg       0.98      0.98      0.98      1969


Confusion Matrix:
[[1531   16]
 [  17  405]]


In [None]:
print(f"GPU Available: {model.get_gpu_device_count()} devices")