# Overview Materi

Jelaskan perbedaan singkat antara grid, randomized, bayesian search cv dengan optuna menurut pemahamanmu

source: https://www.youtube.com/watch?v=t-INgABWULw

# Import Data & Libraries

In [26]:
# jalankan hanya sekali
!pip install optuna -q

In [27]:
# import library yang dibutuhkan di sini
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import optuna

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [28]:
df = sns.load_dataset('iris')
df.head(35)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


# Data Preprocessing

In [29]:
df['species'].value_counts()

Unnamed: 0_level_0,count
species,Unnamed: 1_level_1
setosa,50
versicolor,50
virginica,50


In [30]:
# ubah variabel kategorik ke numerik
df['species'] = df['species'].map({'setosa': 0, 'versicolor': 1, 'virginica': 2})
df.head()


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [31]:
# subsetting peubah
X = df.drop(['species'], axis=1)
y = df['species']

# Dataset Splitting

In [32]:
# split dengan rasio 80:20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Base Model Random Forest

In [33]:
# gunakan random forest classifier
rfr = RandomForestClassifier()
rfr.fit(X_train, y_train)

In [34]:
y_pred = rfr.predict(X_test)

In [35]:
print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.3f}")
print(f"Recall: {recall_score(y_test, y_pred, average='weighted'):.3f}")
print(f"F1 Score: {f1_score(y_test, y_pred, average='weighted'):.3f}")
print(classification_report(y_test, y_pred))

Accuracy: 1.000
Precision: 1.000
Recall: 1.000
F1 Score: 1.000
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



# Optuna

In [36]:
from sklearn.model_selection import cross_val_score

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    max_depth = trial.suggest_int('max_depth', 2, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 32)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 32)

    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        random_state=42
    )

    score = cross_val_score(model, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy')

    return score.mean()

Hyperparameter dapat disesuaikan dengan algoritma yang digunakan. Kali ini kita menggunakan Random Forest sehingga yang dapat kita select adalah *n_estimators, max_depth, min_samples_split,* dan *min_samples_leaf*

In [37]:
study = optuna.create_study(direction='maximize')

[I 2025-10-04 15:48:15,139] A new study created in memory with name: no-name-5c8ec9ef-bdc4-4e6c-aab0-c26ab0708d4a


In [38]:
study.optimize(objective, n_trials=100)

[I 2025-10-04 15:48:25,960] Trial 0 finished with value: 0.8666666666666666 and parameters: {'n_estimators': 317, 'max_depth': 50, 'min_samples_split': 11, 'min_samples_leaf': 23}. Best is trial 0 with value: 0.8666666666666666.
[I 2025-10-04 15:48:34,353] Trial 1 finished with value: 0.7 and parameters: {'n_estimators': 542, 'max_depth': 28, 'min_samples_split': 5, 'min_samples_leaf': 29}. Best is trial 0 with value: 0.8666666666666666.
[W 2025-10-04 15:48:43,847] Trial 2 failed with parameters: {'n_estimators': 989, 'max_depth': 3, 'min_samples_split': 8, 'min_samples_leaf': 24} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipython-input-3461050660.py", line 17, in objective
    score = cross_val_score(model, X_train, y_train, n_jobs=-1, cv=5, scoring='accuracy')
 

KeyboardInterrupt: 

it may take a while... so just wait n see ^^
<br>
they recommend to set n_trials at 100 cz it seems there's no significant score increase after 100 trials (also inefficient too, you'll have to wait in a quite long time)

In [None]:
study.best_params

Berikut hasil hyperparameter tuning dari Optuna

In [None]:
# cek hasil hyperparameter tuning dari Optuna
best_params = study.best_params
best_params

# Random Forest Using Optuna

In [None]:
# simpan hasil best hyperparameter tuning ke variabel bari
best_params = study.best_params

In [None]:
best_model = RandomForestClassifier(
    n_estimators=best_params['n_estimators'],
    max_depth=best_params['max_depth'],
    min_samples_split=best_params['min_samples_split'],
    min_samples_leaf=best_params['min_samples_leaf'],
    random_state=42
)

best_model.fit(X_train, y_train)

In [None]:
y_pred = best_model.predict(X_test)

In [None]:
print(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
print(f"Precision: {precision_score(y_test, y_pred, average='weighted'):.3f}")
print(f"Recall: {recall_score(y_test, y_pred, average='weighted'):.3f}")
print(f"F1 Score: {f1_score(y_test, y_pred, average='weighted'):.3f}")
print(classification_report(y_test, y_pred))

Tidak terdapat kenaikan skor dengan sebelum menggunakan Optuna sebab skor yang dihasilkan melalui base model saja sudah bagus