<a href="https://colab.research.google.com/github/psaw/hse-ai24-ml/blob/main/Optuna_screencast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Общий алгоритм работы с Optuna

In [None]:
!pip install optuna -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m365.7/365.7 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.3/212.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h

1. Определяем целевую функцию objective, через аргументы она будет получать специальный объект trial. С его помощью можно назначать различные гипермараметры, Например, как в примере ниже, мы задаем x в интервале [-10,10].

2. Далее создаем объект обучения с помощью метода optuna.create_study.

3. Запускаем оптимизацию целевой функции objective на 10 итераций n_trials=10. Происходит 10 вызовов нашей функции с различными параметрами от -10 до 10. Какие именно параметры выбирает optuna будет описано ниже.

In [None]:
import optuna

def objective(trial):
    x = trial.suggest_float('x', -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials=40)

study.best_params

[32m[I 2023-04-19 18:47:54,785][0m A new study created in memory with name: no-name-f96dfb93-34ed-49ce-84f0-5d3246ad513c[0m
[32m[I 2023-04-19 18:47:54,802][0m Trial 0 finished with value: 18.45675720414344 and parameters: {'x': 6.296132819658098}. Best is trial 0 with value: 18.45675720414344.[0m
[32m[I 2023-04-19 18:47:54,809][0m Trial 1 finished with value: 24.75951211512401 and parameters: {'x': -2.975893097236315}. Best is trial 0 with value: 18.45675720414344.[0m
[32m[I 2023-04-19 18:47:54,837][0m Trial 2 finished with value: 32.76221902653796 and parameters: {'x': 7.723829052875178}. Best is trial 0 with value: 18.45675720414344.[0m
[32m[I 2023-04-19 18:47:54,842][0m Trial 3 finished with value: 64.44069057392396 and parameters: {'x': -6.027495909305962}. Best is trial 0 with value: 18.45675720414344.[0m
[32m[I 2023-04-19 18:47:54,849][0m Trial 4 finished with value: 51.91574732535013 and parameters: {'x': -5.205258310799838}. Best is trial 0 with value: 18.45675

{'x': 1.991323472944153}

## Загрузка данных и импорт библиотек

In [None]:
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import r2_score

from sklearn.datasets import fetch_california_housing

In [None]:
RANDOM_STATE = 42

In [None]:
!pip install lightgbm -q

In [None]:
from lightgbm import LGBMRegressor

In [None]:
data = fetch_california_housing(as_frame=True)

X = data.data
y = data.target

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25, random_state=RANDOM_STATE)

## Подбор гиперпараметров с Optuna

Разобъем данные на тренировочную и тестовую часть. На тренировочной части по кросс-валидации подберем гиперпараметры моделей, а затем проверим качество на тестовой части.

In [None]:
def objective_lgbm(trial):
    max_depth = trial.suggest_int("max_depth", 2, 20)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1, log=True)
    n_estimators = trial.suggest_int("n_estimators", 10, 1000)

    score = cross_val_score(LGBMRegressor(max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators),
                            Xtrain, ytrain, cv=3, scoring='r2', n_jobs=-1).mean()
    return score


study = optuna.create_study(direction="maximize")
study.optimize(objective_lgbm, n_trials=30)

[32m[I 2023-04-19 18:50:23,367][0m A new study created in memory with name: no-name-3bb37c17-e0e3-4d16-8831-90301daf80b4[0m
[32m[I 2023-04-19 18:50:27,118][0m Trial 0 finished with value: 0.8171232497662545 and parameters: {'max_depth': 20, 'learning_rate': 0.038605092793381104, 'n_estimators': 129}. Best is trial 0 with value: 0.8171232497662545.[0m
[32m[I 2023-04-19 18:50:32,280][0m Trial 1 finished with value: 0.8423702466775841 and parameters: {'max_depth': 13, 'learning_rate': 0.04232599692911454, 'n_estimators': 974}. Best is trial 1 with value: 0.8423702466775841.[0m
[32m[I 2023-04-19 18:50:37,546][0m Trial 2 finished with value: 0.056222277804752606 and parameters: {'max_depth': 5, 'learning_rate': 6.500600015349346e-05, 'n_estimators': 730}. Best is trial 1 with value: 0.8423702466775841.[0m
[32m[I 2023-04-19 18:50:39,548][0m Trial 3 finished with value: 0.27458346288125873 and parameters: {'max_depth': 3, 'learning_rate': 0.0003971906418508838, 'n_estimators': 8

In [None]:
study.best_params

{'max_depth': 13, 'learning_rate': 0.04232599692911454, 'n_estimators': 974}

In [None]:
model = LGBMRegressor(**study.best_params)
model.fit(Xtrain, ytrain)

pred = model.predict(Xtest)

r2_score(ytest, pred)

0.855095804413975