In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 4.1 MB/s 
Collecting alembic
  Downloading alembic-1.7.5-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 99.7 MB/s 
Collecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cliff
  Downloading cliff-3.10.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 14.1 MB/s 
[?25hCollecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting Mako
  Downloading Mako-1.1.6-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 5.9 MB/s 
[?25hCollecting cmd2>=1.0.0
  Downloading cmd2-2.3.3-py3-none-any.whl (149 kB)
[K     |████████████████████████████████| 149 kB 79.6 MB/s 
[?25hCollecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.8.0-py2.py3-none-any.whl (112 kB)
[K     |████████████████████████████████| 112 kB 77.4 MB/s 
Collecting stevedore>=2.0

In [2]:
from google.colab import drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
!nvidia-smi

Fri Jan 21 17:31:57 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score, roc_curve

from sklearn.preprocessing import RobustScaler
from tqdm import tqdm

import optuna
import gc

In [5]:
train = pd.read_csv('/content/gdrive/MyDrive/song-popularity-prediction/train.csv')
test = pd.read_csv('/content/gdrive/MyDrive/song-popularity-prediction/test.csv')
columns = [col for col in train.columns.to_list() if col not in ['id','song_popularity']]
data=train[columns]
target=train['song_popularity']

In [6]:
target

0        0
1        1
2        0
3        0
4        0
        ..
39995    0
39996    0
39997    1
39998    0
39999    0
Name: song_popularity, Length: 40000, dtype: int64

In [None]:
    "task": "train",
    'subsample': 0.95312,
    'learning_rate': 0.001635,
    "max_depth": 3,
    "feature_fraction": 0.2256038826485174,############
    "bagging_fraction": 0.7705303688019942,###########
    "min_child_samples": 290,#################
    "reg_alpha": 14.68267919457715,################
    "reg_lambda": 66.156,######################
    "max_bin": 772,#######################
    "min_data_per_group": 177,################
    "bagging_freq": 1,#################
    "cat_smooth": 96,######################
    "cat_l2": 17,###########################
    "verbosity": -1,
    'random_state':42,
    'colsample_bytree':0.1107

In [11]:
from optuna.integration import LightGBMPruningCallback
from sklearn.metrics import log_loss
import lightgbm as lgb
import sklearn
def objective(trial):
  train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
  dtrain = lgb.Dataset(train_x, label=train_y)

  params = {
        "objective": trial.suggest_categorical('objective',['binary']),
        "verbosity": -1,
        "boosting_type": "gbdt",
        "subsample": trial.suggest_uniform('subsample', 0.4, 1.0),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001,1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.1,1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 0.001, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 0.001, 10.0),
        'cat_smooth' : trial.suggest_int('min_data_per_groups', 1, 100),
        'max_depth': trial.suggest_int('max_depth',1,100),
        
        'num_leaves' : trial.suggest_int('num_leaves', 2, 1000),

        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "eval_metric": trial.suggest_categorical('eval_metric',['mlogloss']),
    }
  gbm = lgb.train(params, dtrain)
  
  
  preds = gbm.predict(test_x)
  pred_labels = np.rint(preds)
  accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
  return accuracy
      

In [12]:
study = optuna.create_study(direction='maximize',study_name='song-pop_trials_200')
study.optimize(objective, n_trials=200)

[32m[I 2022-01-21 17:44:11,256][0m A new study created in memory with name: song-pop_trials_200[0m
[32m[I 2022-01-21 17:44:11,919][0m Trial 0 finished with value: 0.639 and parameters: {'objective': 'binary', 'subsample': 0.5451236893762632, 'learning_rate': 0.017040955958507804, 'colsample_bytree': 0.6552448981930782, 'reg_alpha': 3.956607668995326, 'reg_lambda': 0.001466839132897225, 'min_data_per_groups': 83, 'max_depth': 54, 'num_leaves': 440, 'feature_fraction': 0.44215732633806865, 'bagging_fraction': 0.5758411405204902, 'bagging_freq': 5, 'min_child_samples': 83, 'eval_metric': 'mlogloss'}. Best is trial 0 with value: 0.639.[0m
[32m[I 2022-01-21 17:44:13,213][0m Trial 1 finished with value: 0.5996666666666667 and parameters: {'objective': 'binary', 'subsample': 0.8433694424664837, 'learning_rate': 0.35544895146463984, 'colsample_bytree': 0.68668514165626, 'reg_alpha': 1.1061362941915747, 'reg_lambda': 5.4535951209416655, 'min_data_per_groups': 64, 'max_depth': 24, 'num_l

In [13]:
study.best_params

{'bagging_fraction': 0.9079251000817714,
 'bagging_freq': 3,
 'colsample_bytree': 0.40903081974813305,
 'eval_metric': 'mlogloss',
 'feature_fraction': 0.48347153979334523,
 'learning_rate': 0.02656122490275354,
 'max_depth': 80,
 'min_child_samples': 76,
 'min_data_per_groups': 8,
 'num_leaves': 144,
 'objective': 'binary',
 'reg_alpha': 1.1540446013241468,
 'reg_lambda': 0.001985732140307453,
 'subsample': 0.684709082226984}

In [14]:
study.best_value

0.6428333333333334