### Import packages

In [11]:
import time
import itertools
import numpy as np
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

from utilities import cal_score, convert_types

### Columns

In [2]:
cols_num = ['txn_dt', 'total_floor', 'building_complete_dt', 'parking_area', 'parking_price', 'txn_floor', 'land_area', 'building_area', 'lat', 'lon', 'village_income_median', 'town_population', 'town_area', 'town_population_density', 'doc_rate', 'master_rate', 'bachelor_rate', 'jobschool_rate', 'highschool_rate', 'junior_rate', 'elementary_rate', 'born_rate', 'death_rate', 'marriage_rate', 'divorce_rate', 'N_50', 'N_500', 'N_1000', 'N_5000', 'N_10000', 'I_10', 'I_50', 'I_100', 'I_250', 'I_500', 'I_1000', 'I_5000', 'I_10000', 'I_MIN', 'II_10', 'II_50', 'II_100', 'II_250', 'II_500', 'II_1000', 'II_5000', 'II_10000', 'II_MIN', 'III_10', 'III_50', 'III_100', 'III_250', 'III_500', 'III_1000', 'III_5000', 'III_10000', 'III_MIN', 'IV_10', 'IV_50', 'IV_100', 'IV_250', 'IV_500', 'IV_1000', 'IV_5000', 'IV_10000', 'IV_MIN', 'V_10', 'V_50', 'V_100', 'V_250', 'V_500', 'V_1000', 'V_5000', 'V_10000', 'V_MIN', 'VI_50', 'VI_100', 'VI_250', 'VI_500', 'VI_1000', 'VI_5000', 'VI_10000', 'VI_MIN', 'VII_10', 'VII_50', 'VII_100', 'VII_250', 'VII_500', 'VII_1000', 'VII_5000', 'VII_10000', 'VII_MIN', 'VIII_10', 'VIII_50', 'VIII_100', 'VIII_250', 'VIII_500', 'VIII_1000', 'VIII_5000', 'VIII_10000', 'VIII_MIN', 'IX_10', 'IX_50', 'IX_100', 'IX_250', 'IX_500', 'IX_1000', 'IX_5000', 'IX_10000', 'IX_MIN', 'X_10', 'X_50', 'X_100', 'X_250', 'X_500', 'X_1000', 'X_5000', 'X_10000', 'X_MIN', 'XI_10', 'XI_50', 'XI_100', 'XI_250', 'XI_500', 'XI_1000', 'XI_5000', 'XI_10000', 'XI_MIN', 'XII_10', 'XII_50', 'XII_100', 'XII_250', 'XII_500', 'XII_1000', 'XII_5000', 'XII_10000', 'XII_MIN', 'XIII_10', 'XIII_50', 'XIII_100', 'XIII_250', 'XIII_500', 'XIII_1000', 'XIII_5000', 'XIII_10000', 'XIII_MIN', 'XIV_10', 'XIV_50', 'XIV_100', 'XIV_250', 'XIV_500', 'XIV_1000', 'XIV_5000', 'XIV_10000', 'XIV_MIN']
cols_cat = ['building_material', 'city', 'building_type', 'building_use', 'parking_way', 'town', 'village']
cols_bin = ['I_index_50', 'I_index_500', 'I_index_1000', 'II_index_50', 'II_index_500', 'II_index_1000', 'III_index_50', 'III_index_500', 'III_index_1000', 'IV_index_50', 'IV_index_500', 'IV_index_1000', 'IV_index_5000', 'V_index_50', 'V_index_500', 'V_index_1000', 'VI_10', 'VI_index_50', 'VI_index_500', 'VI_index_1000', 'VII_index_50', 'VII_index_500', 'VII_index_1000', 'VIII_index_50', 'VIII_index_500', 'VIII_index_1000', 'IX_index_50', 'IX_index_500', 'IX_index_1000', 'IX_index_5000', 'X_index_50', 'X_index_500', 'X_index_1000', 'XI_index_50', 'XI_index_500', 'XI_index_1000', 'XI_index_5000', 'XI_index_10000', 'XII_index_50', 'XII_index_500', 'XII_index_1000', 'XIII_index_50', 'XIII_index_500', 'XIII_index_1000', 'XIII_index_5000', 'XIII_index_10000', 'XIV_index_50', 'XIV_index_500', 'XIV_index_1000']
cols_feats = cols_num + cols_cat + cols_bin

col_target = 'total_price'
col_target_log1p = 'target'

### Read data

In [3]:
df = pd.read_csv('dataset/train.csv', dtype=object)

In [4]:
df.columns

Index(['building_id', 'building_material', 'city', 'txn_dt', 'total_floor',
       'building_type', 'building_use', 'building_complete_dt', 'parking_way',
       'parking_area',
       ...
       'XIV_500', 'XIV_index_500', 'XIV_1000', 'XIV_index_1000', 'XIV_5000',
       'XIV_index_5000', 'XIV_10000', 'XIV_index_10000', 'XIV_MIN',
       'total_price'],
      dtype='object', length=235)

### Preprocessing

In [5]:
# Convert types
df = convert_types(df, cols_num, col_target=col_target)

# Generate feats (train-test-same feats)
#create_feats(df)

### Feat engineering

In [6]:
from feature_engineering import CategoricalColumnsEncoder

In [7]:
class FeatureEngineering():
    def __init__(self):
        pass
    
    def fit_transform(self, df, cols_cat, cols_bin):
        df = df.copy()
        # Denote categorical-type
        self.cat_encoder = CategoricalColumnsEncoder(mode='pandas')
        self.cat_encoder.fit_transform(df, cols_cat+cols_bin)
        return df
    
    def transform(self, df):
        df = df.copy()
        self.cat_encoder.transform(df)
        return df

### Grid search

In [8]:
# pars
is_log1p = True # if train on log1p target
# pars

# grid search
params_fix = {'task': 'train',
              'boosting_type': 'gbdt',
              'objective': 'mse',
              'metric': 'mape',
              'learning_rate': 0.100,
              }

#### Round 1

In [9]:
t0 = time.time()

params_gsearch1 = {'num_leaves': [63, 255, 511],           # may reduce in dim-reduction exp
                   'feature_fraction': [0.5, 0.75, 1.0],
                   'min_data_in_leaf': [5, 20, 50]
                   }

gsearch = {}
folds = KFold(n_splits=3, shuffle=True, random_state=123)
for i_fold, (itrain, ival) in enumerate(folds.split(df)): # kfold
    print('==== Fold', i_fold+1, '====')
    
    # split train, val
    df_train = df.loc[itrain]
    df_val = df.loc[ival]
    
    # feat eng
    feat_eng = FeatureEngineering()
    df_train = feat_eng.fit_transform(df_train, cols_cat, cols_bin)
    df_val = feat_eng.transform(df_val)

    # Construct lgb dataset
    if is_log1p:
        lgb_train = lgb.Dataset(df_train[cols_feats], df_train['target']).construct()
        lgb_val = lgb.Dataset(df_val[cols_feats], df_val['target'], reference=lgb_train).construct()
    else:
        lgb_train = lgb.Dataset(df_train[cols_feats], df_train['total_price']).construct()
        lgb_val = lgb.Dataset(df_val[cols_feats], df_val['total_price'], reference=lgb_train).construct()
        
    # grid search
    for values in itertools.product(*[params_gsearch1[key] for key in params_gsearch1]):
        params = params_fix.copy()
        params.update( dict(zip(params_gsearch1.keys(), values)) )
        print('params:', params)

        model = lgb.train(params, lgb_train,
                          num_boost_round=10000,
                          valid_sets=lgb_val,
                          verbose_eval=2000,
                          early_stopping_rounds=200)
        y_pred = model.predict(df_val[cols_feats])
        
        if is_log1p:
            y_pred_expm1 = np.expm1(y_pred)
            y_pred_final = np.clip(y_pred_expm1, 0, None)
        else:
            y_pred_final = y_pred
            
        score = cal_score(df_val['total_price'], y_pred_final)
        tuple_params = tuple(params.items())
        gsearch[tuple_params] = gsearch.get(tuple_params, []) + [score]

# aggregate, sort gsearch results
gsearch_results1 = [[key, np.mean(val), val] for key, val in gsearch.items()]
gsearch_results1.sort(key= lambda x: x[1], reverse=True)
display(gsearch_results1)

print(f'total computing time: {time.time()-t0}')

==== Fold 1 ====
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 63, 'feature_fraction': 0.5, 'min_data_in_leaf': 5}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00905288
[4000]	valid_0's mape: 0.00898725
[6000]	valid_0's mape: 0.00897212
[8000]	valid_0's mape: 0.0089668
[10000]	valid_0's mape: 0.00896467
Did not meet early stopping. Best iteration is:
[9999]	valid_0's mape: 0.00896466
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 63, 'feature_fraction': 0.5, 'min_data_in_leaf': 20}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00901935
[4000]	valid_0's mape: 0.0089757
Early stopping, best iteration is:
[4584]	valid_0's mape: 0.00897188
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'nu

[2000]	valid_0's mape: 0.00936392
[4000]	valid_0's mape: 0.00936313
[6000]	valid_0's mape: 0.00936306
Early stopping, best iteration is:
[6724]	valid_0's mape: 0.00936306
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 511, 'feature_fraction': 0.5, 'min_data_in_leaf': 50}
Training until validation scores don't improve for 200 rounds.
Early stopping, best iteration is:
[725]	valid_0's mape: 0.00925356
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 511, 'feature_fraction': 0.75, 'min_data_in_leaf': 5}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00914756
[4000]	valid_0's mape: 0.00914749
Early stopping, best iteration is:
[5733]	valid_0's mape: 0.00914749
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 511, 'feature_f

[2000]	valid_0's mape: 0.00910511
[4000]	valid_0's mape: 0.00910241
[6000]	valid_0's mape: 0.00910205
[8000]	valid_0's mape: 0.00910196
[10000]	valid_0's mape: 0.00910194
Did not meet early stopping. Best iteration is:
[9991]	valid_0's mape: 0.00910194
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 50}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00914097
Early stopping, best iteration is:
[2318]	valid_0's mape: 0.00913997
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 1.0, 'min_data_in_leaf': 5}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00912981
[4000]	valid_0's mape: 0.00912826
[6000]	valid_0's mape: 0.00912821
[8000]	valid_0's mape: 0.00912821
Early stopping, best ite

[2000]	valid_0's mape: 0.00930909
[4000]	valid_0's mape: 0.00925122
[6000]	valid_0's mape: 0.00924072
[8000]	valid_0's mape: 0.0092377
[10000]	valid_0's mape: 0.00923682
Did not meet early stopping. Best iteration is:
[9962]	valid_0's mape: 0.00923682
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 63, 'feature_fraction': 1.0, 'min_data_in_leaf': 20}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00933989
[4000]	valid_0's mape: 0.00929344
[6000]	valid_0's mape: 0.00927978
[8000]	valid_0's mape: 0.00927548
[10000]	valid_0's mape: 0.00927352
Did not meet early stopping. Best iteration is:
[9998]	valid_0's mape: 0.00927351
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 63, 'feature_fraction': 1.0, 'min_data_in_leaf': 50}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_

Early stopping, best iteration is:
[791]	valid_0's mape: 0.00939457


[[(('task', 'train'),
   ('boosting_type', 'gbdt'),
   ('objective', 'mse'),
   ('metric', 'mape'),
   ('learning_rate', 0.1),
   ('num_leaves', 255),
   ('feature_fraction', 0.75),
   ('min_data_in_leaf', 5)),
  5268.189078851624,
  [5276.856084201995, 5262.857674414753, 5264.8534779381225]],
 [(('task', 'train'),
   ('boosting_type', 'gbdt'),
   ('objective', 'mse'),
   ('metric', 'mape'),
   ('learning_rate', 0.1),
   ('num_leaves', 255),
   ('feature_fraction', 0.75),
   ('min_data_in_leaf', 20)),
  5261.85503558492,
  [5270.856434062865, 5236.85650497921, 5277.852167712686]],
 [(('task', 'train'),
   ('boosting_type', 'gbdt'),
   ('objective', 'mse'),
   ('metric', 'mape'),
   ('learning_rate', 0.1),
   ('num_leaves', 255),
   ('feature_fraction', 1.0),
   ('min_data_in_leaf', 5)),
  5247.186439145,
  [5250.852736361821, 5268.856133436929, 5221.85044763625]],
 [(('task', 'train'),
   ('boosting_type', 'gbdt'),
   ('objective', 'mse'),
   ('metric', 'mape'),
   ('learning_rate', 0.

AttributeError: 'float' object has no attribute 'time'

#### Round 2

In [12]:
t0 = time.time()

params_gsearch2 = {'lambda_l1': [0, 0.01, 0.1],
                   'lambda_l2': [0, 0.01, 0.1]
                  }

gsearch = {}
folds = KFold(n_splits=3, shuffle=True, random_state=246)
for i_fold, (itrain, ival) in enumerate(folds.split(df)): # kfold
    print('==== Fold', i_fold+1, '====')
    
    # split train, val
    df_train = df.loc[itrain]
    df_val = df.loc[ival]
    
    # feat eng
    feat_eng = FeatureEngineering()
    df_train = feat_eng.fit_transform(df_train, cols_cat, cols_bin)
    df_val = feat_eng.transform(df_val)

    # Construct lgb dataset
    if is_log1p:
        lgb_train = lgb.Dataset(df_train[cols_feats], df_train['target']).construct()
        lgb_val = lgb.Dataset(df_val[cols_feats], df_val['target'], reference=lgb_train).construct()
    else:
        lgb_train = lgb.Dataset(df_train[cols_feats], df_train['total_price']).construct()
        lgb_val = lgb.Dataset(df_val[cols_feats], df_val['total_price'], reference=lgb_train).construct()
        
    # grid search
    # pick top 5 params from round 1
    for result1 in gsearch_results1[:5]:
        params1 = dict(result1[0])
        for values in itertools.product(*[params_gsearch2[key] for key in params_gsearch2]):
            params = params1.copy()
            params.update( dict(zip(params_gsearch2.keys(), values)) )
            print('params:', params)

            model = lgb.train(params, lgb_train,
                              num_boost_round=10000,
                              valid_sets=lgb_val,
                              verbose_eval=2000,
                              early_stopping_rounds=200)
            y_pred = model.predict(df_val[cols_feats])

            if is_log1p:
                y_pred_expm1 = np.expm1(y_pred)
                y_pred_final = np.clip(y_pred_expm1, 0, None)
            else:
                y_pred_final = y_pred

            score = cal_score(df_val['total_price'], y_pred_final)
            tuple_params = tuple(params.items())
            gsearch[tuple_params] = gsearch.get(tuple_params, []) + [score]

# aggregate, sort gsearch results
gsearch_results2 = [[key, np.mean(val), val] for key, val in gsearch.items()]
gsearch_results2.sort(key= lambda x: x[1], reverse=True)
display(gsearch_results2)

print(f'total computing time: {time.time()-t0}')

==== Fold 1 ====
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 5, 'lambda_l1': 0, 'lambda_l2': 0}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00908757
[4000]	valid_0's mape: 0.00908563
[6000]	valid_0's mape: 0.0090855
[8000]	valid_0's mape: 0.00908549
[10000]	valid_0's mape: 0.00908549
Did not meet early stopping. Best iteration is:
[9999]	valid_0's mape: 0.00908549
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 5, 'lambda_l1': 0, 'lambda_l2': 0.01}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00907749
[4000]	valid_0's mape: 0.00907566
[6000]	valid_0's mape: 0.00907557
[8000]	valid_0's mape: 0.00907556
Early stopping, best iteration is:
[9157]	va

Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00928211
[4000]	valid_0's mape: 0.00928072
[6000]	valid_0's mape: 0.00928066
[8000]	valid_0's mape: 0.00928066
[10000]	valid_0's mape: 0.00928066
Did not meet early stopping. Best iteration is:
[9998]	valid_0's mape: 0.00928066
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 1.0, 'min_data_in_leaf': 5, 'lambda_l1': 0, 'lambda_l2': 0.1}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00925569
[4000]	valid_0's mape: 0.00925431
[6000]	valid_0's mape: 0.00925426
Early stopping, best iteration is:
[7313]	valid_0's mape: 0.00925426
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 1.0, 'min_data_in_leaf': 5, 'lambda_l1': 0.01, 'lambda_l2': 0}
Training until validation

[2000]	valid_0's mape: 0.00916119
[4000]	valid_0's mape: 0.00910725
[6000]	valid_0's mape: 0.00909447
[8000]	valid_0's mape: 0.00908998
[10000]	valid_0's mape: 0.00908859
Did not meet early stopping. Best iteration is:
[9996]	valid_0's mape: 0.00908858
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 63, 'feature_fraction': 0.75, 'min_data_in_leaf': 5, 'lambda_l1': 0.01, 'lambda_l2': 0.01}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00917736
[4000]	valid_0's mape: 0.00911847
[6000]	valid_0's mape: 0.00910616
[8000]	valid_0's mape: 0.00910184
[10000]	valid_0's mape: 0.00910029
Did not meet early stopping. Best iteration is:
[9967]	valid_0's mape: 0.00910029
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 63, 'feature_fraction': 0.75, 'min_data_in_leaf': 5, 'lambda_l1': 0.01, 'lambda_l2': 0.1}


[2000]	valid_0's mape: 0.00901549
Early stopping, best iteration is:
[3446]	valid_0's mape: 0.00901433
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 20, 'lambda_l1': 0.01, 'lambda_l2': 0.01}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00908304
Early stopping, best iteration is:
[3439]	valid_0's mape: 0.00908162
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 20, 'lambda_l1': 0.01, 'lambda_l2': 0.1}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.0090582
Early stopping, best iteration is:
[3403]	valid_0's mape: 0.00905628
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, '

[2000]	valid_0's mape: 0.00913923
Early stopping, best iteration is:
[2587]	valid_0's mape: 0.00913799
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 50, 'lambda_l1': 0.1, 'lambda_l2': 0.01}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00913377
Early stopping, best iteration is:
[2434]	valid_0's mape: 0.00913303
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 50, 'lambda_l1': 0.1, 'lambda_l2': 0.1}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00904864
Early stopping, best iteration is:
[2715]	valid_0's mape: 0.00904739
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 63, 'fe

Early stopping, best iteration is:
[1893]	valid_0's mape: 0.0090005
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 5, 'lambda_l1': 0.1, 'lambda_l2': 0.01}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00902462
Early stopping, best iteration is:
[1895]	valid_0's mape: 0.00902462
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 5, 'lambda_l1': 0.1, 'lambda_l2': 0.1}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00903591
Early stopping, best iteration is:
[1929]	valid_0's mape: 0.0090359
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_l

[2000]	valid_0's mape: 0.009088
Early stopping, best iteration is:
[1823]	valid_0's mape: 0.009088
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 50, 'lambda_l1': 0, 'lambda_l2': 0}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00902423
[4000]	valid_0's mape: 0.00902194
Early stopping, best iteration is:
[5595]	valid_0's mape: 0.0090215
params: {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'mse', 'metric': 'mape', 'learning_rate': 0.1, 'num_leaves': 255, 'feature_fraction': 0.75, 'min_data_in_leaf': 50, 'lambda_l1': 0, 'lambda_l2': 0.01}
Training until validation scores don't improve for 200 rounds.
[2000]	valid_0's mape: 0.00909468
[4000]	valid_0's mape: 0.00909181
[6000]	valid_0's mape: 0.00909122
Early stopping, best iteration is:
[5958]	valid_0's mape: 0.00909121
params: {'task': 'train', 'boosting_type'

[6000]	valid_0's mape: 0.00899802
Early stopping, best iteration is:
[7059]	valid_0's mape: 0.0089959


[[(('task', 'train'),
   ('boosting_type', 'gbdt'),
   ('objective', 'mse'),
   ('metric', 'mape'),
   ('learning_rate', 0.1),
   ('num_leaves', 255),
   ('feature_fraction', 0.75),
   ('min_data_in_leaf', 5),
   ('lambda_l1', 0.01),
   ('lambda_l2', 0.01)),
  5280.5223792655715,
  [5272.855039949397, 5284.855721199942, 5283.856376647377]],
 [(('task', 'train'),
   ('boosting_type', 'gbdt'),
   ('objective', 'mse'),
   ('metric', 'mape'),
   ('learning_rate', 0.1),
   ('num_leaves', 255),
   ('feature_fraction', 0.75),
   ('min_data_in_leaf', 5),
   ('lambda_l1', 0.1),
   ('lambda_l2', 0)),
  5278.189105502227,
  [5296.855836352749, 5292.855008633959, 5244.856471519973]],
 [(('task', 'train'),
   ('boosting_type', 'gbdt'),
   ('objective', 'mse'),
   ('metric', 'mape'),
   ('learning_rate', 0.1),
   ('num_leaves', 63),
   ('feature_fraction', 0.75),
   ('min_data_in_leaf', 5),
   ('lambda_l1', 0.01),
   ('lambda_l2', 0.01)),
  5277.521540321143,
  [5278.853372049127, 5280.855076611605,

total computing time: 52606.37984395027


### Conclusion

Best params:


In [13]:
a= pd.DataFrame({'a':[1,2,3,4]})