### Import libraries and dataset

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK

import math
import lightgbm as lgb

In [None]:
# Import raw data 
df = pd.read_csv('../csv/true_car_listings.csv')

### Data and Variable preparations 

In [None]:
### Exclusions

# Extremely high prices
df.drop(df[df['Price'] > 100000].index, inplace = True)

In [None]:
continous_vars = ['Year', 'Mileage']
categorical_vars = ['Make', 'Model']
target = 'Price'

In [None]:
for var in categorical_vars:
    le = LabelEncoder()
    df[var] = le.fit_transform(df[var])

### Try hyperopt 

In [None]:
data = df.copy()

In [None]:
dtrain = lgb.Dataset(data[continous_vars + categorical_vars],
                     data[target],             
                     categorical_feature = categorical_vars,
                     free_raw_data=False)

In [None]:
def train(params):
    
        params['application'] = 'regression'
        params['num_iterations'] = 1000
        params['early_stopping_round'] = 10
        params['feature_fraction'] = 1
        
        cv_results = lgb.cv(params, dtrain, nfold=5, 
                              seed = 72, categorical_feature=categorical_vars, stratified=False,
                              verbose_eval =None, metrics = 'mae')

        return np.min(cv_results['l1-mean'])

In [None]:
space = {
    'learning_rate': 
hp.uniform('learning_rate', 0.01, 1),
         'max_depth': 
hp.choice('max_depth', range(1, 30, 1)),
         'num_leaves': 
hp.choice('num_leaves', range(2, 100, 1)),
         'bagging_fraction': 
hp.uniform('bagging_fraction', 0.5, 1),         
         'min_split_gain': 
hp.uniform('min_split_gain', 0.001, 0.1),        
        'min_child_weight': 
hp.choice('min_child_weight', range(10, 25, 1))            }

In [None]:
trials = Trials()
best = fmin(fn = train,
           space = space,
            trials = trials,
           algo = tpe.suggest,
           max_evals = 15)

In [None]:
best

In [None]:
best2

In [None]:
trials.results