### Import libraries and dataset

In [23]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from matplotlib import pyplot as plt
import math
import lightgbm as lgb

In [3]:
# Import raw data 
df = pd.read_csv('../csv/true_car_listings.csv')

### Data and Variable preparations 

In [4]:
### Exclusions

# Extremely high prices
df.drop(df[df['Price'] > 100000].index, inplace = True)

In [9]:
continous_vars = ['Year', 'Mileage']
categorical_vars = ['Make', 'Model']
target = 'Price'

In [24]:
for var in categorical_vars:
    le = LabelEncoder()
    df[var] = le.fit_transform(df[var])

### Try hyperopt 

In [155]:
dtrain = lgb.Dataset(data[continous_vars + categorical_vars],
                     data[target],             
                     categorical_feature = categorical_vars,
                     free_raw_data=False)

In [177]:
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
import matplotlib.pyplot as plt

In [197]:
def train(params):
    
        params['application'] = 'regression'
        params['num_iterations'] = 1000
        params['early_stopping_round'] = 10
        params['feature_fraction'] = 1
        
        cv_results = lgb.cv(params, dtrain, nfold=5, 
                              seed = 72, categorical_feature=categorical_vars, stratified=False,
                              verbose_eval =None, metrics = 'mae')

        return np.min(cv_results['l1-mean'])

In [198]:
space = {
    'learning_rate': 
hp.uniform('learning_rate', 0.01, 1),
         'max_depth': 
hp.choice('max_depth', range(1, 30, 1)),
         'num_leaves': 
hp.choice('num_leaves', range(2, 100, 1)),
         'bagging_fraction': 
hp.uniform('bagging_fraction', 0.5, 1),         
         'min_split_gain': 
hp.uniform('min_split_gain', 0.001, 0.1),        
        'min_child_weight': 
hp.choice('min_child_weight', range(10, 25, 1))            }

In [199]:
trials = Trials()
best = fmin(fn = train,
           space = space,
            trials = trials,
           algo = tpe.suggest,
           max_evals = 50)

100%|██████████| 50/50 [27:11<00:00, 32.64s/trial, best loss: 2183.069901822856] 


In [200]:
best

{'bagging_fraction': 0.6789978290704509,
 'learning_rate': 0.07505885043928769,
 'max_depth': 13,
 'min_child_weight': 5,
 'min_split_gain': 0.04060760311169449,
 'num_leaves': 93}

In [202]:
trials.results

[{'loss': 2292.378646421143, 'status': 'ok'},
 {'loss': 2198.834321849252, 'status': 'ok'},
 {'loss': 2242.738408325614, 'status': 'ok'},
 {'loss': 2275.565718223557, 'status': 'ok'},
 {'loss': 2252.732916351258, 'status': 'ok'},
 {'loss': 2236.0558931958353, 'status': 'ok'},
 {'loss': 2231.87954027227, 'status': 'ok'},
 {'loss': 2199.7675789650816, 'status': 'ok'},
 {'loss': 2233.991829246768, 'status': 'ok'},
 {'loss': 2205.794444615657, 'status': 'ok'},
 {'loss': 2279.70476067877, 'status': 'ok'},
 {'loss': 2280.5790462105774, 'status': 'ok'},
 {'loss': 2283.657725995358, 'status': 'ok'},
 {'loss': 2209.68445492868, 'status': 'ok'},
 {'loss': 2207.827156873448, 'status': 'ok'},
 {'loss': 2230.586059066283, 'status': 'ok'},
 {'loss': 2253.210825813657, 'status': 'ok'},
 {'loss': 2230.7581239732476, 'status': 'ok'},
 {'loss': 2208.32543186136, 'status': 'ok'},
 {'loss': 2212.7146360010274, 'status': 'ok'},
 {'loss': 2186.5575430849913, 'status': 'ok'},
 {'loss': 2235.064608052009, 'st