### Import libraries and dataset

In [1]:
import pandas as pd
import lightgbm as lgb
import optuna

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
# Import raw data 
def import_raw_data():
    raw = pd.read_csv('../csv/true_car_listings.csv')
    df = raw[['Price','Mileage','Year','Make']].copy().dropna()
    df['Make'] = df['Make'].rank() 
    return df 

In [3]:
def train_iterr(df,params):
    categorical = ['Make']
    continuous = ['Mileage','Year']
    target = 'Price'
    train = df.sample(frac=0.8).copy() 
    test = df.loc[~df.index.isin(train.index.values)].copy()        
    lgtrain = lgb.Dataset(train[categorical+continuous], categorical_feature=categorical,label=train[target])
    lgval = lgb.Dataset(test[categorical+continuous], categorical_feature=categorical,label=test[target])
    model = lgb.train(params, lgtrain, valid_sets=lgval, early_stopping_rounds=100, verbose_eval=20)
    return model.best_score['valid_0']['rmse']

In [4]:
def objective(trial):
    
    params = {
        "objective": "regression",
        "metric": "rmse",
        "num_leaves": 30,
        "learning_rate": 0.1
    }
    
    leaves = trial.suggest_uniform('num_leaves', 10,1000)
    params['num_leaves'] = int(leaves)
    #lr = trial.suggest_uniform('num_leaves', 0.01,0.1)
    #params['learning_rate'] = lr   
    
    
    result = train_iterr(df,params)
    return result

df = import_raw_data()
study = optuna.create_study()
study.optimize(objective, n_trials=5)

study.best_params  

[32m[I 2021-07-08 11:37:01,971][0m A new study created in memory with name: no-name-357d56e8-02c6-4668-a8f8-69c15c407535[0m


Training until validation scores don't improve for 100 rounds
[20]	valid_0's rmse: 8526.87
[40]	valid_0's rmse: 8427.43
[60]	valid_0's rmse: 8434.78
[80]	valid_0's rmse: 8439.11
[100]	valid_0's rmse: 8440.04
Did not meet early stopping. Best iteration is:
[43]	valid_0's rmse: 8425.85


[32m[I 2021-07-08 11:37:06,269][0m Trial 0 finished with value: 8425.845973214462 and parameters: {'num_leaves': 148.27315500230455}. Best is trial 0 with value: 8425.845973214462.[0m


Training until validation scores don't improve for 100 rounds
[20]	valid_0's rmse: 8752.9
[40]	valid_0's rmse: 8610.71
[60]	valid_0's rmse: 8600.59
[80]	valid_0's rmse: 8605.59
[100]	valid_0's rmse: 8606.9
Did not meet early stopping. Best iteration is:
[58]	valid_0's rmse: 8599.38


[32m[I 2021-07-08 11:37:14,299][0m Trial 1 finished with value: 8599.382478586502 and parameters: {'num_leaves': 385.81850611015483}. Best is trial 0 with value: 8425.845973214462.[0m


Training until validation scores don't improve for 100 rounds
[20]	valid_0's rmse: 8435.09
[40]	valid_0's rmse: 8344.04
[60]	valid_0's rmse: 8350.94
[80]	valid_0's rmse: 8356.24
[100]	valid_0's rmse: 8360.56
Did not meet early stopping. Best iteration is:
[36]	valid_0's rmse: 8342.96


[32m[I 2021-07-08 11:37:24,017][0m Trial 2 finished with value: 8342.957769426792 and parameters: {'num_leaves': 614.5357178182937}. Best is trial 2 with value: 8342.957769426792.[0m


Training until validation scores don't improve for 100 rounds
[20]	valid_0's rmse: 8574.83
[40]	valid_0's rmse: 8460.92
[60]	valid_0's rmse: 8459.71
[80]	valid_0's rmse: 8467.77
[100]	valid_0's rmse: 8473.45
Did not meet early stopping. Best iteration is:
[59]	valid_0's rmse: 8458


[32m[I 2021-07-08 11:37:32,916][0m Trial 3 finished with value: 8458.000016366057 and parameters: {'num_leaves': 357.5354872157008}. Best is trial 2 with value: 8342.957769426792.[0m


Training until validation scores don't improve for 100 rounds
[20]	valid_0's rmse: 8814.25
[40]	valid_0's rmse: 8680.8
[60]	valid_0's rmse: 8670.88
[80]	valid_0's rmse: 8675.28
[100]	valid_0's rmse: 8679.13
Did not meet early stopping. Best iteration is:
[59]	valid_0's rmse: 8670.39


[32m[I 2021-07-08 11:37:45,549][0m Trial 4 finished with value: 8670.386566051413 and parameters: {'num_leaves': 731.0716000718473}. Best is trial 2 with value: 8342.957769426792.[0m


{'num_leaves': 614.5357178182937}