### Import libraries and dataset

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK

import math
import lightgbm as lgb

In [2]:
# Import raw data 
df = pd.read_csv('../csv/true_car_listings.csv')

### Data and Variable preparations 

In [3]:
### Exclusions

# Extremely high prices
df.drop(df[df['Price'] > 100000].index, inplace = True)

In [4]:
continous_vars = ['Year', 'Mileage']
categorical_vars = ['Make', 'Model']
target = 'Price'

In [5]:
for var in categorical_vars:
    le = LabelEncoder()
    df[var] = le.fit_transform(df[var])

### Try hyperopt 

In [8]:
data = df.copy()

In [9]:
dtrain = lgb.Dataset(data[continous_vars + categorical_vars],
                     data[target],             
                     categorical_feature = categorical_vars,
                     free_raw_data=False)

In [10]:
def train(params):
    
        params['application'] = 'regression'
        params['num_iterations'] = 1000
        params['early_stopping_round'] = 10
        params['feature_fraction'] = 1
        
        cv_results = lgb.cv(params, dtrain, nfold=5, 
                              seed = 72, categorical_feature=categorical_vars, stratified=False,
                              verbose_eval =None, metrics = 'mae')

        return np.min(cv_results['l1-mean'])

In [11]:
space = {
    'learning_rate': 
hp.uniform('learning_rate', 0.01, 1),
         'max_depth': 
hp.choice('max_depth', range(1, 30, 1)),
         'num_leaves': 
hp.choice('num_leaves', range(2, 100, 1)),
         'bagging_fraction': 
hp.uniform('bagging_fraction', 0.5, 1),         
         'min_split_gain': 
hp.uniform('min_split_gain', 0.001, 0.1),        
        'min_child_weight': 
hp.choice('min_child_weight', range(10, 25, 1))            }

In [12]:
trials = Trials()
best = fmin(fn = train,
           space = space,
            trials = trials,
           algo = tpe.suggest,
           max_evals = 15)

  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]





100%|██████████| 50/50 [1:10:45<00:00, 84.91s/trial, best loss: 2182.192477633955] 


In [14]:
best

{'bagging_fraction': 0.6296862669434514,
 'learning_rate': 0.04413213389097873,
 'max_depth': 28,
 'min_child_weight': 13,
 'min_split_gain': 0.06531986293934416,
 'num_leaves': 88}

In [15]:
trials.results

[{'loss': 2188.4124530237705, 'status': 'ok'},
 {'loss': 2283.851485809139, 'status': 'ok'},
 {'loss': 2214.881336653373, 'status': 'ok'},
 {'loss': 2245.0877111268874, 'status': 'ok'},
 {'loss': 2262.671620626672, 'status': 'ok'},
 {'loss': 2188.083757485994, 'status': 'ok'},
 {'loss': 2256.948662659091, 'status': 'ok'},
 {'loss': 2267.664299423187, 'status': 'ok'},
 {'loss': 2863.9417728081107, 'status': 'ok'},
 {'loss': 2233.5219805529614, 'status': 'ok'},
 {'loss': 2239.8329988657533, 'status': 'ok'},
 {'loss': 2190.884225384373, 'status': 'ok'},
 {'loss': 2211.6459945926676, 'status': 'ok'},
 {'loss': 2199.724613356966, 'status': 'ok'},
 {'loss': 2216.9031807659617, 'status': 'ok'},
 {'loss': 2207.007942795374, 'status': 'ok'},
 {'loss': 2217.2109663124907, 'status': 'ok'},
 {'loss': 2225.0155799823183, 'status': 'ok'},
 {'loss': 2464.701287128665, 'status': 'ok'},
 {'loss': 2229.9460163233693, 'status': 'ok'},
 {'loss': 2200.2526670947473, 'status': 'ok'},
 {'loss': 2215.68235387