### IMPORT PACKAGES

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lightgbm as lgb
import catboost as cgb
import xgboost as xgb
from bayes_opt import BayesianOptimization
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings('ignore'

### LOAD THE DATA

In [4]:
data = pd.read_csv('test_table_Data_Science.csv',sep = ';')
data.shape

(1000000, 3)

### DATA UNDERSTANDING

In [5]:
data.head()

Unnamed: 0,t,s,top
0,16271,16256,0.999547
1,78842,34694,0.445013
2,131070,29548,0.228693
3,1458,415,0.318898
4,241583,87136,0.363435


In [6]:
data.tail()

Unnamed: 0,t,s,top
999995,71922224,32133776,0.44695
999996,155765148,57783963,0.371077
999997,741401475,417600756,0.56331
999998,668787009,564937317,0.844759
999999,356934856,258961921,0.725582


### DATATYPES AND NULL VALUES

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000000 entries, 0 to 999999
Data columns (total 3 columns):
 #   Column  Non-Null Count    Dtype  
---  ------  --------------    -----  
 0   t       1000000 non-null  int64  
 1   s       1000000 non-null  int64  
 2   top     1000000 non-null  float64
dtypes: float64(1), int64(2)
memory usage: 22.9 MB


### STATSISTICS

In [8]:
data.describe(include='all')

Unnamed: 0,t,s,top
count,1000000.0,1000000.0,1000000.0
mean,249639300.0,124961000.0,0.500245
std,220238000.0,146317000.0,0.2887155
min,4.0,1.0,5.356996e-07
25%,67482480.0,19812810.0,0.2503714
50%,186338600.0,68972330.0,0.500252
75%,381959800.0,177709200.0,0.7503263
max,999562100.0,973267300.0,0.9999996


### CHECK DUPLICATES

In [20]:
data[data.duplicated()].sum()

t      0.0
s      0.0
top    0.0
dtype: float64

### TRAIN TEST SPLIT

In [32]:
X = data[['s', 't']] 
y = data['top'].values
from sklearn.model_selection import train_test_split
X_train,X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=123)

### Converting data into Light GBM format

In [38]:
dtrain = lgb.Dataset(data=X_train, label=y_train)

### Hyperparameter Tuning

In [39]:
def lgb_r2_score(preds, dtrain):
    labels = dtrain.get_label()
    return 'r2', rmse(labels, preds), True

def hyp_lgbm(num_leaves, feature_fraction, bagging_fraction, max_depth, min_split_gain, min_child_weight):
      
        params = {'application':'regression','num_iterations': 1000,
                  'learning_rate':0.01, 'early_stopping_round':50,
                  'metric':'RMSE'} # Default parameters
        params["num_leaves"] = int(round(num_leaves))
        params['feature_fraction'] = max(min(feature_fraction, 1), 0)
        params['bagging_fraction'] = max(min(bagging_fraction, 1), 0)
        params['max_depth'] = int(round(max_depth))
        params['min_split_gain'] = min_split_gain
        params['min_child_weight'] = min_child_weight
        cv_results = lgb.cv(params, dtrain, nfold=10, seed=101,categorical_feature=[], stratified=False,
                            verbose_eval =None, feval=lgb_r2_score )
        # print(cv_results)
        return np.max(cv_results['r2-mean'])

In [40]:
def rmse(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.sqrt(np.mean(np.abs((y_true**2 - y_pred**2))))

In [41]:
# Domain space-- Range of hyperparameters 
pds = {'num_leaves': (5, 100),
          'feature_fraction': (0.1, 0.9),
          'bagging_fraction': (0.1, 1),
          'max_depth': (5,50),
          'min_split_gain': (0.001, 0.1),
          'min_child_weight': (5, 100)
          }

In [42]:
# Surrogate model
optimizer = BayesianOptimization(hyp_lgbm, pds, random_state=77)
                                  
# Optimize
optimizer.maximize(init_points=5, n_iter=15)

|   iter    |  target   | baggin... | featur... | max_depth | min_ch... | min_sp... | num_le... |
-------------------------------------------------------------------------------------------------
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
An

[LightGBM] [Info] Start training from score 0.500431
| [95m 2       [0m | [95m 0.4997  [0m | [95m 0.3935  [0m | [95m 0.5329  [0m | [95m 15.81   [0m | [95m 56.82   [0m | [95m 0.04065 [0m | [95m 72.94   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true`

| [95m 4       [0m | [95m 0.4997  [0m | [95m 0.1516  [0m | [95m 0.6976  [0m | [95m 25.35   [0m | [95m 21.7    [0m | [95m 0.005888[0m | [95m 32.79   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins

| [95m 6       [0m | [95m 0.4997  [0m | [95m 0.9758  [0m | [95m 0.5501  [0m | [95m 7.999   [0m | [95m 52.79   [0m | [95m 0.08944 [0m | [95m 7.826   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough,

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can 

| [0m 10      [0m | [0m 0.4997  [0m | [0m 0.8104  [0m | [0m 0.306   [0m | [0m 10.37   [0m | [0m 53.67   [0m | [0m 0.06197 [0m | [0m 5.561   [0m |
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_col_wise=true` to r

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can 

| [0m 14      [0m | [0m 0.4997  [0m | [0m 0.9223  [0m | [0m 0.3557  [0m | [0m 26.4    [0m | [0m 28.49   [0m | [0m 0.04614 [0m | [0m 5.68    [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can

[LightGBM] [Info] Start training from score 0.500517
[LightGBM] [Info] Start training from score 0.500431
| [0m 16      [0m | [0m 0.4997  [0m | [0m 0.578   [0m | [0m 0.3856  [0m | [0m 43.93   [0m | [0m 36.34   [0m | [0m 0.0668  [0m | [0m 67.04   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used 

| [0m 18      [0m | [0m 0.4997  [0m | [0m 0.8388  [0m | [0m 0.6805  [0m | [0m 42.45   [0m | [0m 47.86   [0m | [0m 0.06493 [0m | [0m 38.7    [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 720000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can

| [0m 20      [0m | [0m 0.4981  [0m | [0m 0.7801  [0m | [0m 0.807   [0m | [0m 48.35   [0m | [0m 72.65   [0m | [0m 0.01221 [0m | [0m 32.94   [0m |


In [43]:
optimizer.max

{'target': 0.499737439980753,
 'params': {'bagging_fraction': 0.19718906981980094,
  'feature_fraction': 0.1747720421578305,
  'max_depth': 9.755162704664823,
  'min_child_weight': 52.39938864298944,
  'min_split_gain': 0.07785973541803283,
  'num_leaves': 6.449988028275264}}

### Train LighGBM model with tuned parameter

In [46]:
params = {'num_iterations': 1000,'learning_rate':0.01, 'metric':'RMSE',
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'bagging_fraction': 0.19718906981980094,
  'feature_fraction': 0.1747720421578305,
  'max_depth': 10,
  'min_child_weight': 52.39938864298944,
  'min_split_gain': 0.07785973541803283,
  'num_leaves':6}

print('Starting training...')
# train
gbm = lgb.train(params, dtrain, categorical_feature=[])

Starting training...
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 800000, number of used features: 2
[LightGBM] [Info] Start training from score 0.500495














### Prediction

In [47]:
# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)

In [None]:
rmse(y_test, y_pred)

# -------------------------------------------------------------------------------------------------

### Training the model on full data

In [53]:
dtrain = lgb.Dataset(data=X, label=y)

In [54]:
# Domain space-- Range of hyperparameters 
pds = {'num_leaves': (5, 100),
          'feature_fraction': (0.1, 0.9),
          'bagging_fraction': (0.1, 1),
          'max_depth': (5,50),
          'min_split_gain': (0.001, 0.1),
          'min_child_weight': (5, 100)
          }

In [55]:
# Surrogate model
optimizer = BayesianOptimization(hyp_lgbm, pds, random_state=77)
                                  
# Optimize
optimizer.maximize(init_points=5, n_iter=15)

|   iter    |  target   | baggin... | featur... | max_depth | min_ch... | min_sp... | num_le... |
-------------------------------------------------------------------------------------------------
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
An

[LightGBM] [Info] Start training from score 0.500182
[LightGBM] [Info] Start training from score 0.500260
[LightGBM] [Info] Start training from score 0.500165
| [95m 2       [0m | [95m 0.4995  [0m | [95m 0.3935  [0m | [95m 0.5329  [0m | [95m 15.81   [0m | [95m 56.82   [0m | [95m 0.04065 [0m | [95m 72.94   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Nu

[LightGBM] [Info] Start training from score 0.500165
| [95m 4       [0m | [95m 0.4995  [0m | [95m 0.1516  [0m | [95m 0.6976  [0m | [95m 25.35   [0m | [95m 21.7    [0m | [95m 0.005888[0m | [95m 32.79   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_col_wise=true`

| [95m 6       [0m | [95m 0.4995  [0m | [95m 0.4724  [0m | [95m 0.2945  [0m | [95m 6.299   [0m | [95m 56.46   [0m | [95m 0.04552 [0m | [95m 6.815   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough,

[LightGBM] [Info] Start training from score 0.500182
[LightGBM] [Info] Start training from score 0.500260
[LightGBM] [Info] Start training from score 0.500165
| [0m 8       [0m | [0m 0.4995  [0m | [0m 0.8983  [0m | [0m 0.1528  [0m | [0m 32.25   [0m | [0m 51.04   [0m | [0m 0.0492  [0m | [0m 73.37   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of 

[LightGBM] [Info] Start training from score 0.500260
[LightGBM] [Info] Start training from score 0.500165
| [0m 10      [0m | [0m 0.4995  [0m | [0m 0.8614  [0m | [0m 0.4667  [0m | [0m 40.04   [0m | [0m 41.69   [0m | [0m 0.07383 [0m | [0m 61.05   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used 

[LightGBM] [Info] Start training from score 0.500260
[LightGBM] [Info] Start training from score 0.500165
| [0m 12      [0m | [0m 0.4995  [0m | [0m 0.4096  [0m | [0m 0.2571  [0m | [0m 34.74   [0m | [0m 86.37   [0m | [0m 0.04057 [0m | [0m 49.03   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used 

| [95m 14      [0m | [95m 0.4995  [0m | [95m 0.9223  [0m | [95m 0.3557  [0m | [95m 26.4    [0m | [95m 28.49   [0m | [95m 0.04614 [0m | [95m 5.68    [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough,

[LightGBM] [Info] Start training from score 0.500182
[LightGBM] [Info] Start training from score 0.500260
[LightGBM] [Info] Start training from score 0.500165
| [0m 16      [0m | [0m 0.4995  [0m | [0m 0.578   [0m | [0m 0.3856  [0m | [0m 43.93   [0m | [0m 36.34   [0m | [0m 0.0668  [0m | [0m 67.04   [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of 

[LightGBM] [Info] Start training from score 0.500182
[LightGBM] [Info] Start training from score 0.500260
[LightGBM] [Info] Start training from score 0.500165
| [0m 18      [0m | [0m 0.4995  [0m | [0m 0.8388  [0m | [0m 0.6805  [0m | [0m 42.45   [0m | [0m 47.86   [0m | [0m 0.06493 [0m | [0m 38.7    [0m |
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 900000, number of used features: 2
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of 

[LightGBM] [Info] Start training from score 0.500245
[LightGBM] [Info] Start training from score 0.500227
[LightGBM] [Info] Start training from score 0.500182
[LightGBM] [Info] Start training from score 0.500260
[LightGBM] [Info] Start training from score 0.500165
| [0m 20      [0m | [0m 0.4995  [0m | [0m 0.6687  [0m | [0m 0.1078  [0m | [0m 6.363   [0m | [0m 62.17   [0m | [0m 0.06197 [0m | [0m 43.27   [0m |


In [56]:
optimizer.max

{'target': 0.4995235927672333,
 'params': {'bagging_fraction': 0.9222512864163701,
  'feature_fraction': 0.3556918956607501,
  'max_depth': 26.40392406591672,
  'min_child_weight': 28.489608260706905,
  'min_split_gain': 0.046141498535868235,
  'num_leaves': 5.6796559686850125}}

In [57]:
params = {'num_iterations': 1000,'learning_rate':0.01, 'metric':'RMSE',
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'bagging_fraction': 0.9222512864163701,
  'feature_fraction': 0.3556918956607501,
  'max_depth': 26,
  'min_child_weight': 28.489608260706905,
  'min_split_gain': 0.046141498535868235,
  'num_leaves':6}

print('Starting training...')
# train
gbm = lgb.train(params, dtrain, categorical_feature=[])

Starting training...
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 1000000, number of used features: 2
[LightGBM] [Info] Start training from score 0.500245










### Save Model

In [59]:
gbm.save_model('lgb_regressor.txt', num_iteration=gbm.best_iteration) 

<lightgbm.basic.Booster at 0x7fab010da8d0>

### Load Model

In [None]:
model = lgb.Booster(model_file='lgb_regressor.txt')

In [61]:
#import libraries
import numpy as np
from flask import Flask, render_template,request
import pickle#Initialize the flask App
app = Flask(__name__)
model = model = lgb.Booster(model_file='lgb_regressor.txt')

In [62]:
#default page of our web-app
@app.route('/')
def home():
    return render_template('index.html')

In [63]:
#To use the predict button in our web-app
@app.route('/predict',methods=['POST'])
def predict():
    #For rendering results on HTML GUI
    int_features = [float(x) for x in request.form.values()]
    final_features = [np.array(int_features)]
    prediction = model.predict(final_features)
    output = round(prediction[0], 2) 
    return render_template('index.html', prediction_text='Prediction of top :{}'.format(output))