In [19]:
import pandas as pd
import numpy as np
import joblib
import os

from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, PowerTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from lightgbm import LGBMClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from catboost import CatBoostClassifier

print("✅ Libraries imported successfully!")

✅ Libraries imported successfully!


In [20]:
x_tr_resample = pd.read_csv('../Data/X_train_smote.csv')
y_tr_resample = np.loadtxt("../Data/y_train_smote.csv", delimiter=",")

print(f"✅ Data loaded! X_train shape: {x_tr_resample.shape}, y_train shape: {y_tr_resample.shape}")


✅ Data loaded! X_train shape: (12240, 7), y_train shape: (12240,)


In [21]:
minmax = MinMaxScaler()
x_minmax_scaled = minmax.fit_transform(x_tr_resample)
joblib.dump(minmax, "../Models/tuning/minmax.pkl")

# Transformasi PowerTransformer
pt = PowerTransformer()
x_pt_scaled = pt.fit_transform(x_tr_resample)
joblib.dump(pt, "../Models/tuning/powertransformer.pkl")

print("✅ Data scaled & scalers saved!")

✅ Data scaled & scalers saved!


In [22]:
param_grids = {
    "LGBM": {
        'n_estimators': [100, 300],
        'learning_rate': [0.3],  
        'num_leaves': [120],  
        'max_depth': [3],  
        'feature_fraction': [0.84],  
        'bagging_fraction': [0.75],  
        'min_data_in_leaf': [80],  
        'lambda_l1': [0.2],  
        'lambda_l2': [0.56]  
    },
    "ETC": {
        'n_estimators': [400],
        'max_depth': [50],
        'min_samples_split': [2],
        'min_samples_leaf': [2],
        'max_features': [None],
        'bootstrap': [True], 
        'criterion': ['gini']
    },
    "SVM": {
        'C': [0.01, 0.1, 1, 10, 100, 1000],
        'gamma': [1e-4, 1e-3, 1e-2, 0.1, 1, 10, 100],
        'kernel': ['rbf']
    },
    "GNB": {
        'var_smoothing': np.concatenate([
            np.logspace(-15, -12, 10),  
            np.logspace(-12, -9, 15),   
            np.logspace(-9, -6, 15), 
            np.logspace(-6, -3, 10)    
        ]),
        'priors': [
            None,  
            [0.05, 0.95], [0.95, 0.05],  
            [0.1, 0.9], [0.15, 0.85],
            [0.3, 0.7], [0.35, 0.65], 
            [0.4, 0.6], [0.45, 0.55],  
            [0.5, 0.5], 
            [0.55, 0.45], [0.6, 0.4],
            [0.65, 0.35], [0.7, 0.3], 
            [0.75, 0.25], [0.8, 0.2], [0.85, 0.15],
            [0.9, 0.1], [0.95, 0.05]
        ],
    },   
    "CATBOOST": {
        'iterations': [100, 300],
        'depth': [8, 10],
        'learning_rate': [0.05, 0.1]   
    }
}
print("✅ Hyperparameter grid defined!")

✅ Hyperparameter grid defined!


In [23]:
models = {
    "LGBM": LGBMClassifier(boosting_type='gbdt', objective='binary', metric='accuracy', verbose=-1, random_state=42),
    "ETC": ExtraTreesClassifier(random_state=42),
    "SVM": SVC(probability=True, random_state=42),
    "GNB": GaussianNB(),
    "CATBOOST": CatBoostClassifier(verbose=0, random_state=42, devices='0:1')
}


In [24]:
model_dir = "../Models/Tuning"
os.makedirs(model_dir, exist_ok=True)

print("✅ Models initialized!")

✅ Models initialized!


In [25]:
results = []
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

In [26]:
for name, model in models.items():
    print(f"\n🔍 Tuning {name} with GridSearchCV...", flush=True)

    # Select dataset
    if name == "SVM":
        X_train = x_minmax_scaled
    elif name == "GNB":
        X_train = x_pt_scaled
    else:
        X_train = x_tr_resample

    # Grid Search with verbose
    grid_search = GridSearchCV(model, param_grids[name], cv=cv, scoring='accuracy', n_jobs=4, verbose=3)
    grid_search.fit(X_train, y_tr_resample)

    # Best Model
    best_model = grid_search.best_estimator_
    best_params = grid_search.best_params_

    print(f"✅ Best Params for {name}: {best_params}")

    # Evaluate
    y_pred = best_model.predict(X_train)
    y_pred_proba = best_model.predict_proba(X_train)[:, 1] if hasattr(best_model, "predict_proba") else np.zeros_like(y_pred)

    # Compute Metrics
    accuracy = accuracy_score(y_tr_resample, y_pred) * 100
    precision = precision_score(y_tr_resample, y_pred, zero_division=0) * 100
    recall = recall_score(y_tr_resample, y_pred, zero_division=0) * 100
    f1 = f1_score(y_tr_resample, y_pred, zero_division=0) * 100
    roc_auc = roc_auc_score(y_tr_resample, y_pred_proba) * 100

    # Save Best Model
    model_path = os.path.join(model_dir, f"MODEL-{name}.pkl")
    joblib.dump(best_model, model_path)

    print(f"📦 {name} model saved at {model_path}")

    # Store Results
    results.append([name, accuracy, precision, recall, f1, roc_auc])


🔍 Tuning LGBM with GridSearchCV...
Fitting 10 folds for each of 2 candidates, totalling 20 fits
✅ Best Params for LGBM: {'bagging_fraction': 0.75, 'feature_fraction': 0.84, 'lambda_l1': 0.2, 'lambda_l2': 0.56, 'learning_rate': 0.3, 'max_depth': 3, 'min_data_in_leaf': 80, 'n_estimators': 100, 'num_leaves': 120}
📦 LGBM model saved at ../Models/Tuning\MODEL-LGBM.pkl

🔍 Tuning ETC with GridSearchCV...
Fitting 10 folds for each of 1 candidates, totalling 10 fits
✅ Best Params for ETC: {'bootstrap': True, 'criterion': 'gini', 'max_depth': 50, 'max_features': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 400}
📦 ETC model saved at ../Models/Tuning\MODEL-ETC.pkl

🔍 Tuning SVM with GridSearchCV...
Fitting 10 folds for each of 42 candidates, totalling 420 fits
✅ Best Params for SVM: {'C': 1000, 'gamma': 10, 'kernel': 'rbf'}
📦 SVM model saved at ../Models/Tuning\MODEL-SVM.pkl

🔍 Tuning GNB with GridSearchCV...
Fitting 10 folds for each of 950 candidates, totalling 9500 fits

In [27]:
print("\n📊 Final Evaluation Results:")
print("=" * 75)
print(f"{'Model':<10} {'Accuracy':<12} {'Precision':<12} {'Recall':<12} {'F1 Score':<12} {'ROC AUC':<12}")
print("=" * 75)
for res in results:
    print(f"{res[0]:<10} {res[1]:<12.4f} {res[2]:<12.4f} {res[3]:<12.4f} {res[4]:<12.4f} {res[5]:<12.4f}")
print("=" * 75)


📊 Final Evaluation Results:
Model      Accuracy     Precision    Recall       F1 Score     ROC AUC     
LGBM       99.9265      99.9836      99.8693      99.9264      99.9994     
ETC        99.9510      99.9836      99.9183      99.9510      99.9999     
SVM        100.0000     100.0000     100.0000     100.0000     100.0000    
GNB        94.8039      94.5854      95.0490      94.8166      97.7200     
CATBOOST   99.9673      100.0000     99.9346      99.9673      100.0000    
