In [4]:
# Installing required library
!pip install -q optuna

[K     |████████████████████████████████| 204kB 2.8MB/s 
[K     |████████████████████████████████| 1.1MB 11.4MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
[K     |████████████████████████████████| 81kB 6.4MB/s 
[K     |████████████████████████████████| 81kB 7.0MB/s 
[K     |████████████████████████████████| 51kB 5.2MB/s 
[K     |████████████████████████████████| 112kB 20.3MB/s 
[K     |████████████████████████████████| 122kB 17.3MB/s 
[?25h  Building wheel for alembic (PEP 517) ... [?25l[?25hdone
  Building wheel for optuna (setup.py) ... [?25l[?25hdone
  Building wheel for pyperclip (setup.py) ... [?25l[?25hdone


In [29]:
Importing required metadata
import pandas as pd 
from sklearn import ensemble
from sklearn import linear_model
from tqdm import tqdm
import optuna
import lightgbm as lgbm
from optuna.samplers import TPESampler
from sklearn import model_selection
from sklearn import metrics
import numpy as np

In [7]:
# Google colab specific
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
# Google Colab Specific
from google.colab import files
src = list(files.upload().values())[0]
open('cross_val.py','wb').write(src)
from cross_val import cross_val

Saving cross_val.py to cross_val (2).py


In [27]:
# Objective function Hyperparameter Tuning
def objective(trial):    
    param = {
        'objective': 'regression',
        'metric': 'mean_absolute_error',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 512),
        'learning_rate': 0.01,
        'n_estimators': trial.suggest_int('n_estimators', 700, 3000),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
    }
    lgbm_regr = lgbm.LGBMRegressor(**param)
    gbm_2 = lgbm_regr.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], verbose=False)
    return metrics.mean_absolute_error(np.expm1(y_valid), np.expm1(gbm_2.predict(X_valid)))

In [35]:
if __name__=='__main__':
    # Read Data Here
    df = pd.read_csv('/content/drive/My Drive/CrossValidation_And_HyperParam/train.csv')
    # Shuffle the data
    df = df.sample(frac=1).reset_index(drop=True)

    # Split the data into dependent and independent variable
    y = df.price_range.values
    X = df.drop('price_range',axis='columns').values

    ############### Declare the estimatiors

    X_train,X_valid, y_train,y_valid = model_selection.train_test_split(X,y,test_size=0.2)
    # for reproducibility
    sampler = TPESampler(seed=10) 
    # Optimize the model hyperparameter
    study = optuna.create_study(direction='minimize', sampler=sampler)
    # Change the number of trial as per need 
    study.optimize(objective, n_trials=10,n_jobs=1,show_progress_bar=True)



    best_param = study.best_params
    #Calling Cross Validation Funcrion
    reg = lgbm.LGBMRegressor(**best_param)
    cross_val(reg,5,X,y,'regression')


Progress bar is experimental (supported from v1.2.0). The interface can change in the future.



HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

[I 2020-07-24 13:44:47,360] Finished trial#0 with value: 1.4411666240408343 with parameters: {'lambda_l1': 0.08747537025773001, 'lambda_l2': 1.537331564587801e-08, 'num_leaves': 322, 'n_estimators': 1880, 'feature_fraction': 0.6991042073815543, 'bagging_fraction': 0.5348779873185086, 'bagging_freq': 2, 'min_child_samples': 13}. Best is trial#0 with value: 1.4411666240408343.
[I 2020-07-24 13:44:48,923] Finished trial#1 with value: 1.5381736127514682 with parameters: {'lambda_l1': 2.4552467279949516e-06, 'lambda_l2': 2.3573583942260753e-06, 'num_leaves': 504, 'n_estimators': 2252, 'feature_fraction': 0.8899611011324, 'bagging_fraction': 0.6016429506787311, 'bagging_freq': 1, 'min_child_samples': 67}. Best is trial#0 with value: 1.4411666240408343.
[I 2020-07-24 13:44:50,083] Finished trial#2 with value: 1.6898381335155646 with parameters: {'lambda_l1': 1.8860355948710983e-08, 'lambda_l2': 0.7916522657454635, 'num_leaves': 179, 'n_estimators': 777, 'feature_fraction': 0.47887489144151374

5it [00:08,  1.77s/it]


R2 Score: 0.9247851374958043
Mean Error :0.30623879791703534
