<a href="https://colab.research.google.com/github/namoshi/ml_intro/blob/master/optuna_SVM_iris.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# optunaを用いたSVM のハイパーパラメータの最適化

In [16]:
!pip install optuna



In [17]:
import optuna
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

# データの準備
iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 目的関数の定義
def objective(trial):
    # ハイパーパラメータの範囲指定
    C = trial.suggest_float("C", 1e-10, 1e10, log=True)
    kernel = trial.suggest_categorical("kernel", ["linear", "poly", "rbf", "sigmoid"])

    # モデルの学習
    svm = SVC(C=C, kernel=kernel)
#    svm.fit(X_train, y_train)
    scores = cross_val_score(svm, X_train, y_train, cv=5)
#    print(scores)

    # モデルの評価
#    y_pred = svm.predict(X_test)
#    accuracy = accuracy_score(y_test, y_pred)
    accuracy = scores.mean()

    return accuracy

# 最適化プロセスの実行
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

# 最適化結果の表示
print("Best trial:")
trial = study.best_trial
print(f"  Value: {trial.value}")
print(f"  Params: {trial.params}")

[I 2024-09-17 04:31:32,207] A new study created in memory with name: no-name-f29e6856-c718-45bc-9cf6-34f5f14d302d
[I 2024-09-17 04:31:32,235] Trial 0 finished with value: 0.9523809523809523 and parameters: {'C': 190.15907139670773, 'kernel': 'rbf'}. Best is trial 0 with value: 0.9523809523809523.
[I 2024-09-17 04:31:32,266] Trial 1 finished with value: 0.9333333333333333 and parameters: {'C': 95420214.81618288, 'kernel': 'rbf'}. Best is trial 0 with value: 0.9523809523809523.
[I 2024-09-17 04:31:32,297] Trial 2 finished with value: 0.30476190476190473 and parameters: {'C': 0.005213504913554304, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.9523809523809523.
[I 2024-09-17 04:31:32,325] Trial 3 finished with value: 0.30476190476190473 and parameters: {'C': 0.0016434816902267605, 'kernel': 'sigmoid'}. Best is trial 0 with value: 0.9523809523809523.
[I 2024-09-17 04:31:32,352] Trial 4 finished with value: 0.9523809523809523 and parameters: {'C': 4330.856473501024, 'kernel': 'linear'}

Best trial:
  Value: 0.9714285714285715
  Params: {'C': 3.0822459433863028, 'kernel': 'linear'}


In [18]:
C_opt = trial.params['C']
kernel_opt =trial.params['kernel']
print('Best C is ', C_opt)
print('Best kernel is ', kernel_opt)

svm = SVC(C=C_opt, kernel=kernel_opt)
svm.fit(X_train, y_train)
# Test Accuracy
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Test Accuracy = ', accuracy)


Best C is  3.0822459433863028
Best kernel is  linear
Test Accuracy =  1.0
