In [3]:
import numpy as np
import pickle
import optuna
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_selection import SelectFromModel

In [4]:
data = np.load('../../Data/#1/processed_data.npz')
x_tr_resample = data['x_tr_resample']
y_tr_resample = data['y_tr_resample']
X_test = data['X_test']
y_test = data['y_test']
X_train = data['X_train']

# Load normalization
with open('../../Data/#1/power_transformer.pkl', 'rb') as f:
    norm = pickle.load(f)

# Normalize
norm_train_feature = norm.fit_transform(X_train)
norm_test_feature = norm.transform(X_test)

In [5]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 10, 50),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
        'bootstrap': trial.suggest_categorical('bootstrap', [True, False]),
        'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
        'random_state': 42
    }
    
    clf = ExtraTreesClassifier(**params)
    
    # Gunakan Stratified K-Fold untuk validasi
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(clf, x_tr_resample, y_tr_resample, cv=skf, scoring='accuracy')
    
    return scores.mean()

In [6]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=300)  # Lebih banyak trial

# Cetak parameter terbaik
print("Best Hyperparameters:", study.best_params)

[I 2024-12-11 15:48:52,316] A new study created in memory with name: no-name-1f41bf8a-3bfb-4986-8036-5297e7c75e36
[I 2024-12-11 15:49:00,903] Trial 0 finished with value: 0.9806214227309893 and parameters: {'n_estimators': 304, 'max_depth': 12, 'min_samples_split': 7, 'min_samples_leaf': 1, 'max_features': None, 'bootstrap': True, 'criterion': 'gini'}. Best is trial 0 with value: 0.9806214227309893.
[I 2024-12-11 15:49:03,428] Trial 1 finished with value: 0.9778413736713001 and parameters: {'n_estimators': 144, 'max_depth': 46, 'min_samples_split': 18, 'min_samples_leaf': 2, 'max_features': 'sqrt', 'bootstrap': False, 'criterion': 'gini'}. Best is trial 0 with value: 0.9806214227309893.
[I 2024-12-11 15:49:10,032] Trial 2 finished with value: 0.9605069501226492 and parameters: {'n_estimators': 452, 'max_depth': 10, 'min_samples_split': 13, 'min_samples_leaf': 6, 'max_features': 'sqrt', 'bootstrap': True, 'criterion': 'gini'}. Best is trial 0 with value: 0.9806214227309893.
[I 2024-12-1

Best Hyperparameters: {'n_estimators': 309, 'max_depth': 23, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': None, 'bootstrap': False, 'criterion': 'entropy'}


In [7]:
best_clf = ExtraTreesClassifier(**study.best_params, random_state=42)
best_clf.fit(x_tr_resample, y_tr_resample)

In [8]:
y_pred = best_clf.predict(norm_test_feature)
test_accuracy = accuracy_score(y_test, y_pred)

print("\nBest Cross-Validated Accuracy: {:.4f}".format(study.best_value))
print(f"Accuracy: {test_accuracy * 100:.2f}%")


Best Cross-Validated Accuracy: 0.9877
Accuracy: 97.77%
