In [1]:
import pandas as pd
import numpy as np

from utils import reduce_mem_usage

train = pd.read_csv('../input/ion-switching-3-feature-engineering/train_features.csv')

X, y = train.drop(['open_channels', 'time'], axis=1), train['open_channels']

In [3]:
X = reduce_mem_usage(X)

Mem. usage decreased to 430.11 Mb (66.9% reduction)


In [4]:
import optuna

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

from lightgbm import LGBMClassifier

def objective(trial):
    '''
    optuna objective function
    '''
    
    n_estimators = trial.suggest_int('n_estimators', 10, 100)
    num_leaves = trial.suggest_int('num_leaves', 20, 300)
    learning_rate = trial.suggest_loguniform('learning_rate', 0.01, 0.2)
    reg_alpha = trial.suggest_loguniform('reg_alpha', 0.01, 5)
    reg_lambda = trial.suggest_loguniform('reg_lambda', 0.01, 5)
    max_depth = trial.suggest_int('max_depth', 8, 30)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)
    
    lgbm = LGBMClassifier(n_estimators=n_estimators,
                          learning_rate=learning_rate,
                          reg_alpha=reg_alpha,
                          reg_lambda=reg_lambda,
                          max_depth=max_depth,
                          num_leaves=num_leaves,
                          random_state=0)
    
    lgbm.fit(X_train, y_train)
    
    preds = lgbm.predict(X_test)
    
    score = f1_score(y_test, preds, average='macro')
    
    # to maximize the score minimize the negative of the score
    return -score

study = optuna.create_study()
study.optimize(objective, n_trials=20)

study.best_params

[32m[I 2021-01-27 12:56:40,186][0m A new study created in memory with name: no-name-60fe3ee4-7bf3-4100-ac06-1a093354b5ea[0m
[32m[I 2021-01-27 13:01:44,228][0m Trial 0 finished with value: -0.9401391497842877 and parameters: {'n_estimators': 64, 'num_leaves': 229, 'learning_rate': 0.012942224027402692, 'reg_alpha': 0.015339206496057927, 'reg_lambda': 1.0224853815960262, 'max_depth': 14}. Best is trial 0 with value: -0.9401391497842877.[0m
[32m[I 2021-01-27 13:07:05,097][0m Trial 1 finished with value: -0.9402459039195699 and parameters: {'n_estimators': 73, 'num_leaves': 93, 'learning_rate': 0.05011637258507193, 'reg_alpha': 0.05118227683177001, 'reg_lambda': 2.2550042799753784, 'max_depth': 14}. Best is trial 1 with value: -0.9402459039195699.[0m
[32m[I 2021-01-27 13:09:18,802][0m Trial 2 finished with value: -0.939866442338878 and parameters: {'n_estimators': 27, 'num_leaves': 241, 'learning_rate': 0.02559130096983619, 'reg_alpha': 0.13625482789182514, 'reg_lambda': 0.04627

{'n_estimators': 56,
 'num_leaves': 135,
 'learning_rate': 0.0291531383248232,
 'reg_alpha': 1.4994012659353708,
 'reg_lambda': 0.3805273375983891,
 'max_depth': 30}