In [1]:
!pip install optuna-integration

Collecting optuna-integration
  Downloading optuna_integration-4.1.0-py3-none-any.whl.metadata (12 kB)
Downloading optuna_integration-4.1.0-py3-none-any.whl (97 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.4/97.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: optuna-integration
Successfully installed optuna-integration-4.1.0


In [2]:
import numpy as np
import optuna
import pandas as pd
import optuna.integration.lightgbm as lgbo
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split, GridSearchCV
import lightgbm as lgb
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer, KNNImputer

In [3]:
train = pd.read_csv("/kaggle/input/child-mind/train.csv")
target_col='sii'
X = train.drop([target_col], axis=1)
y = train[target_col]


In [4]:
imputer = KNNImputer(n_neighbors=5)
X_imputer = imputer.fit_transform(X)
X = pd.DataFrame(X_imputer, columns=X.columns)

In [5]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
y_train

247     1.0
2488    0.0
2318    0.0
347     2.0
1090    2.0
       ... 
1638    0.0
1095    0.0
1130    0.0
1294    0.0
860     1.0
Name: sii, Length: 2188, dtype: float64

In [7]:
params2 = {'learning_rate': 0.057885574535498495,
 'lambda_l1': 2.08185822086368e-05,
 'lambda_l2': 4.538468316534467,
 'num_leaves': 209,
 'feature_fraction': 0.930147922801105,
 'bagging_fraction': 0.6831325500550711,
 'bagging_freq': 1,
 'min_child_samples': 37}


In [8]:
import optuna
import lightgbm as lgb
from sklearn.metrics import cohen_kappa_score, make_scorer

def quadratic_weighted_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true, y_pred, weights='quadratic')

qwk_scorer = make_scorer(quadratic_weighted_kappa, greater_is_better=True)

def objective(trial):
    # Suggest hyperparameters
    param = {
        "objective": "multiclass",
        "num_class": 4,  # since sii has 4 classes
        "metric": "multi_logloss",  # can optimize different metrics
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True)
    }

    # Create datasets for LightGBM
    dtrain = lgb.Dataset(X_train, label=y_train)
    dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)

    # Train model with early stopping
    gbm = lgb.train(
        param,
        dtrain,
        num_boost_round=1000,
        valid_sets=[dval],
    )

    # Predict on validation set
    y_pred = gbm.predict(X_val)
    # Convert probabilities to predicted classes
    y_pred_classes = y_pred.argmax(axis=1)

    # Calculate QWK
    score = quadratic_weighted_kappa(y_val, y_pred_classes)
    return score

# Optimize with Optuna, aiming to maximize QWK
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("Best Value:", study.best_value)
print("Best Params:", study.best_params)


[I 2024-12-17 19:37:05,475] A new study created in memory with name: no-name-1b62d235-59a4-4d83-b2dc-434b9831c331
[I 2024-12-17 19:37:31,478] Trial 0 finished with value: 0.3066801619433198 and parameters: {'lambda_l1': 0.6401706809181154, 'lambda_l2': 5.672425248257563e-05, 'num_leaves': 33, 'feature_fraction': 0.7586564901564246, 'bagging_fraction': 0.43894620863816314, 'bagging_freq': 3, 'min_child_samples': 17, 'learning_rate': 0.03384580966107191}. Best is trial 0 with value: 0.3066801619433198.
[I 2024-12-17 19:38:27,477] Trial 1 finished with value: 0.3244205796942634 and parameters: {'lambda_l1': 5.344642525008196e-05, 'lambda_l2': 0.02173946046387079, 'num_leaves': 240, 'feature_fraction': 0.8206919437562326, 'bagging_fraction': 0.9520211377133625, 'bagging_freq': 4, 'min_child_samples': 25, 'learning_rate': 0.010363032468271678}. Best is trial 1 with value: 0.3244205796942634.
[I 2024-12-17 19:39:15,288] Trial 2 finished with value: 0.29238251815317173 and parameters: {'lambd

Best Value: 0.389738830703964
Best Params: {'lambda_l1': 6.867958298215203, 'lambda_l2': 0.0010621982857393474, 'num_leaves': 55, 'feature_fraction': 0.5053798017764384, 'bagging_fraction': 0.8712517983148399, 'bagging_freq': 3, 'min_child_samples': 62, 'learning_rate': 0.060063227417318364}


In [9]:
params3 = {'lambda_l1': 2.9209409268664883,
 'lambda_l2': 0.00012976118112242174,
 'num_leaves': 202,
 'feature_fraction': 0.8038056112369449,
 'bagging_fraction': 0.6991108077097957,
 'bagging_freq': 3,
 "learning_rate": 0.05,
 'min_child_samples': 85}

In [10]:
import catboost as cb
from sklearn.metrics import mean_squared_error

def objective_cat(trial):
    params = {
        "iterations": 1000,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100),
    }

    model = cb.CatBoostRegressor(**params, silent=True)
    model.fit(X_train, y_train)
    predictions = model.predict(X_val)
    rmse = mean_squared_error(y_val, predictions, squared=False)
    return rmse

In [11]:
study = optuna.create_study(direction='minimize')
study.optimize(objective_cat, n_trials=50)

[I 2024-12-17 19:51:43,097] A new study created in memory with name: no-name-dc2302a6-3608-4482-9634-f39cdbc94ee0
[I 2024-12-17 19:51:45,159] Trial 0 finished with value: 0.6756232937062647 and parameters: {'learning_rate': 0.00785958003589479, 'depth': 4, 'subsample': 0.7028937254444567, 'colsample_bylevel': 0.22226500375869712, 'min_data_in_leaf': 83}. Best is trial 0 with value: 0.6756232937062647.
[I 2024-12-17 19:52:37,573] Trial 1 finished with value: 0.6804645416296724 and parameters: {'learning_rate': 0.004531586040868085, 'depth': 9, 'subsample': 0.6086580540179518, 'colsample_bylevel': 0.2478319621799746, 'min_data_in_leaf': 70}. Best is trial 0 with value: 0.6756232937062647.
[I 2024-12-17 19:53:01,135] Trial 2 finished with value: 0.6948838994529288 and parameters: {'learning_rate': 0.09852921141596002, 'depth': 7, 'subsample': 0.3103716923346792, 'colsample_bylevel': 0.42081977890096633, 'min_data_in_leaf': 59}. Best is trial 0 with value: 0.6756232937062647.
[I 2024-12-17

In [12]:
study.best_params

{'learning_rate': 0.017747581822839028,
 'depth': 5,
 'subsample': 0.5324952077642681,
 'colsample_bylevel': 0.8180498009117981,
 'min_data_in_leaf': 15}

In [13]:
def objective_xgb(trial):
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dvalid = xgb.DMatrix(X_val, label=y_val)

    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        # use exact for small dataset.
        "tree_method": "approx",
        # defines booster, gblinear for linear functions.
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        # L2 regularization weight.
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        # L1 regularization weight.
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        # sampling ratio for training data.
        "subsample": trial.suggest_float("subsample", 0.2, 1.0),
        # sampling according to each tree.
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
        "learning_rate": 0.05
    }

    if param["booster"] in ["gbtree", "dart"]:
        # maximum depth of the tree, signifies complexity of the tree.
        param["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2)
        # minimum child weight, larger the term more conservative the tree.
        param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        # defines how selective algorithm is.
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    bst = xgb.train(param, dtrain)
    preds = bst.predict(dvalid)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_val, pred_labels)
    return accuracy

# Create a study that tries to maximize accuracy
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print("Number of finished trials:", len(study.trials))
print("Best trial:")
best_trial = study.best_trial
print("  Value:", best_trial.value)
print("  Params:")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2024-12-17 20:05:38,387] A new study created in memory with name: no-name-5ee7c393-2c55-4b3c-a7ad-69c2d28beae2
[I 2024-12-17 20:05:51,379] Trial 0 finished with value: 0.34204328958509733 and parameters: {'lambda_l1': 4.718459997855032, 'lambda_l2': 1.0322466064058957, 'num_leaves': 239, 'feature_fraction': 0.7686115949175772, 'bagging_fraction': 0.47891443415760887, 'bagging_freq': 6, 'min_child_samples': 44, 'learning_rate': 0.02229513592166741}. Best is trial 0 with value: 0.34204328958509733.
[I 2024-12-17 20:06:01,424] Trial 1 finished with value: 0.34751401969465234 and parameters: {'lambda_l1': 0.407883355496291, 'lambda_l2': 2.2097848501210655, 'num_leaves': 213, 'feature_fraction': 0.8163751390051828, 'bagging_fraction': 0.4750133723151295, 'bagging_freq': 1, 'min_child_samples': 83, 'learning_rate': 0.05729477460279862}. Best is trial 1 with value: 0.34751401969465234.
[I 2024-12-17 20:06:24,924] Trial 2 finished with value: 0.2793752697453603 and parameters: {'lambda_l1':

Number of finished trials: 100
Best trial:
  Value: 0.4272755955197981
  Params:
    lambda_l1: 0.3958754953528503
    lambda_l2: 0.0004916199597661849
    num_leaves: 2
    feature_fraction: 0.7934382188979396
    bagging_fraction: 0.5165364918441219
    bagging_freq: 6
    min_child_samples: 71
    learning_rate: 0.016561741947618006


In [14]:
study.best_params

{'lambda_l1': 0.3958754953528503,
 'lambda_l2': 0.0004916199597661849,
 'num_leaves': 2,
 'feature_fraction': 0.7934382188979396,
 'bagging_fraction': 0.5165364918441219,
 'bagging_freq': 6,
 'min_child_samples': 71,
 'learning_rate': 0.016561741947618006}

In [15]:
study.best_params

{'lambda_l1': 0.3958754953528503,
 'lambda_l2': 0.0004916199597661849,
 'num_leaves': 2,
 'feature_fraction': 0.7934382188979396,
 'bagging_fraction': 0.5165364918441219,
 'bagging_freq': 6,
 'min_child_samples': 71,
 'learning_rate': 0.016561741947618006}

In [16]:
def objective_ridge(trial):
    # Load dataset
    # Suggest hyperparameters
    alpha = trial.suggest_float ("alpha", 0.0, 1.0)
    # Train and evaluate model
    model = Ridge(alpha=alpha)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    score = mean_squared_error(y_val, y_val)
    return score

In [17]:
# Create a study object
study = optuna.create_study(direction="minimize")

# Optimize the objective function
study.optimize(objective, n_trials=50)

[I 2024-12-17 20:29:04,570] A new study created in memory with name: no-name-6202bd3f-5a44-4a5e-88c2-e6600b666fda
[I 2024-12-17 20:29:28,187] Trial 0 finished with value: 0.30128447298828864 and parameters: {'lambda_l1': 1.807036856812152e-06, 'lambda_l2': 0.00016406690014671014, 'num_leaves': 72, 'feature_fraction': 0.5965549901115533, 'bagging_fraction': 0.9338399634333298, 'bagging_freq': 2, 'min_child_samples': 75, 'learning_rate': 0.07149787048839838}. Best is trial 0 with value: 0.30128447298828864.
[I 2024-12-17 20:29:49,322] Trial 1 finished with value: 0.30456852791878164 and parameters: {'lambda_l1': 4.0955798221336083e-07, 'lambda_l2': 0.017221614774990474, 'num_leaves': 182, 'feature_fraction': 0.6422700054964624, 'bagging_fraction': 0.9753467208698285, 'bagging_freq': 1, 'min_child_samples': 91, 'learning_rate': 0.07894012800017834}. Best is trial 0 with value: 0.30128447298828864.
[I 2024-12-17 20:30:09,457] Trial 2 finished with value: 0.3680304853590053 and parameters: 