In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 14 16:11:15 2020

@author: Kshitij
"""
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, confusion_matrix, classification_report

# use available dataset for classification
dataset = load_breast_cancer()

X = dataset.data
y = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 2/3, random_state = 1)

''' We will be using lighgbm api without sklearn so we will provide the params
 in a dict format'''
param = param = {'objective': 'binary', 
                 'learning_rate': 0.5,
                 'reg_alpha': 0.5, 
                 'reg_lambda': 0.5}

param['metric'] = 'auc'

# train the model
model = lgb.train(param, lgb.Dataset(X_train, label = y_train))

# make prediction on test dataset
# unlike sklearn classifier the 'predict' method gives probability in lightgbm
pred=model.predict(X_test)
# we get the y_pred with threshold at 0.5
y_pred = np.where(pred>0.5,1,0)

In [3]:
print(y_pred)

# see the f1 score: harmonic mean of precision and recall
f1_score(y_test, y_pred)

[1 0 1 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 0 1 1 0 1 0 1 1 0 0 0 0 1 0 0 1 1 0
 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0
 1 0 1 1 1 0 1 0 1 0 1 1 0 1 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1
 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 0 0 0 1 1 1 0 1 0 0 1 1 0 0 0 1 0 0 0 1 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1
 0 0 1 0 1]


0.9561752988047808

# Introduction to Bayesian Optimization

In [4]:
from hyperopt import  fmin, hp, tpe, Trials, STATUS_OK

def objective(params):
    
    h_model = lgb.train(params, lgb.Dataset(X_train, label = y_train), verbose_eval=False)
    pred    = h_model.predict(X_test)
    y_pred  = np.array(list(map(lambda x: int(x), pred>0.5)))
    f1sc = f1_score(y_test, y_pred)
    loss = 1 - f1sc
    
    return {'loss': loss, 'status' : STATUS_OK}

space = {
    'lambda_l1': hp.uniform('lambda_l1', 0.0, 1.0),
    'lambda_l2': hp.uniform("lambda_l2", 0.0, 1.0),
    'learning_rate' : hp.loguniform('learning_rate', np.log(0.05), np.log(0.25)),
    'objective' : 'binary',
    'metric' : 'auc',
    'verbose': -1
    }

trials = Trials()

# define the optimization function
best = fmin(fn=objective, 
            space=space, 
            algo=tpe.suggest, 
            trials= trials, 
            max_evals=100)
best

100%|██████████| 100/100 [00:13<00:00,  7.30trial/s, best loss: 0.028112449799196693]


{'lambda_l1': 0.29726010349464893,
 'lambda_l2': 0.42617329194394293,
 'learning_rate': 0.1193862634945627}

In [5]:
best

{'lambda_l1': 0.29726010349464893,
 'lambda_l2': 0.42617329194394293,
 'learning_rate': 0.1193862634945627}

In [None]:
# train the model on best parameter results
h_model = lgb.train(best, lgb.Dataset(X_train, label = y_train), verbose_eval=-1)

# get the y_pred
pred_h=h_model.predict(X_test)
y_predh = list(map(lambda x: int(x), pred_h>0.5))

f1_score(y_test, y_predh)

In [11]:
class Mlclass():
    '''Parameter Tuning Class tunes the LightGBM model with different   optimization techniques - Hyperopt, Optuna.'''
    def __init__(self, x_train, y_train):
        '''Initializes the Parameter tuning class and also initializes   LightGBM dataset object
        Parameters
        ----------
        x_train: data (string, numpy array, pandas DataFrame,or list of numpy arrays) – Data source of Dataset.
        y_train: label (list, numpy 1-D array, pandas Series / one-column DataFrame or None – Label of the data.'''
        self.x_train = x_train
        self.y_train = y_train
        self.train_set = lgb.Dataset(data=x_train, label=y_train)

    def tuning(self, optim_type):
        '''Method takes the optimization type and tunes the model'''
        #call the optim_type: Hyperopt or Optuna
        optimization = getattr(self, optim_type)
        return optimization()
  
    def hyperopt_method(self):
        # This method is called by tuning when user inputs 'hyperopt_method' while calling the tuning method
    
        #define the hyperopt space
        space = {'lambda_l1': hp.uniform('lambda_l1', 0.0, 1.0),
                 'lambda_l2': hp.uniform("lambda_l2", 0.0, 1.0),
                 'learning_rate' : hp.loguniform('learning_rate',
                                                 np.log(0.05), np.log(0.25)),
                 'objective' : 'binary',
                 'verbose': -1
                }
        # define algorithm and trials inside the class
        algo, trials= tpe.suggest, Trials()
        
        #Call the fmin from inside the class
        best = fmin(fn=objective,space=space,algo=algo,trials=trials,max_evals=100)
        
        self.params = best
        return best, trials
    
    def objective(self, params):
        # same objective function with added self
        h_model = lgb.train(params, lgb.Dataset(X_train, label = y_train))
        pred=h_model.predict(X_test)
        y_pred = np.array(list(map(lambda x: int(x), pred>0.5)))
        f1sc = f1_score(y_test, y_pred)
        loss = 1 - f1sc
        return {'loss': loss,'status' : STATUS_OK}

In [12]:
Obj = Mlclass(X_train, y_train)

In [13]:
Obj.tuning('hyperopt_method')

100%|██████████| 100/100 [00:15<00:00,  6.47trial/s, best loss: 0.02400000000000002]


({'lambda_l1': 0.05048971454466432,
  'lambda_l2': 0.37705506942576894,
  'learning_rate': 0.1374934083843963},
 <hyperopt.base.Trials at 0x7f4beb5a8b70>)

In [None]:
class Mlclass():
    '''Parameter Tuning Class tunes the LightGBM model with different   optimization techniques - Hyperopt, Optuna.'''
    def __init__(self, x_train, y_train):
        '''Initializes the Parameter tuning class and also initializes   LightGBM dataset object
        Parameters
        ----------
        x_train: data (string, numpy array, pandas DataFrame,or list of numpy arrays) – Data source of Dataset.
        y_train: label (list, numpy 1-D array, pandas Series / one-column DataFrame or None – Label of the data.'''
        self.x_train = x_train
        self.y_train = y_train
        self.train_set = lgb.Dataset(data=x_train, label=y_train)

    def tuning(self, optim_type):
        '''Method takes the optimization type and tunes the model'''
        #call the optim_type: Hyperopt or Optuna
        optimization = getattr(self, optim_type)
        return optimization()
  
    def hyperopt_method(self):
        # This method is called by tuning when user inputs 'hyperopt_method' while calling the tuning method
    
        #define the hyperopt space
        space = {'lambda_l1': hp.uniform('lambda_l1', 0.0, 1.0),
                 'lambda_l2': hp.uniform("lambda_l2", 0.0, 1.0),
                 'learning_rate' : hp.loguniform('learning_rate',
                                                 np.log(0.05), np.log(0.25)),
                 'objective' : 'binary'}
        # define algorithm and trials inside the class
        algo, trials= tpe.suggest, Trials()
        #Call the fmin from inside the class
        best = fmin(fn=objective,space=space,algo=algo,trials=trials,max_evals=1000)
        self.params = best
        return best, trials
    
    def objective(self, params):
        # same objective function with added self
        h_model = lgb.train(params, lgb.Dataset(X_train, label = y_train))
        pred=h_model.predict(X_test)
        y_pred = np.array(list(map(lambda x: int(x), pred>0.5)))
        f1sc = f1_score(y_test, y_pred)
        loss = 1 - f1sc
        return {'loss': loss,'status' : STATUS_OK}
    
    def optuna_method(self):
        study = optuna.create_study(direction='minimize')
        study.optimize(optuna_obj, n_trials=1000)
        self.params = study.best_params
        return study
    
    def optuna_obj(self, trial):
        '''Same optuna objective with parameters space inside the function for optuna optimization'''
        params = {'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
                  'lambda_l2': trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
                  'learning_rate' : trial.suggest_loguniform('learning_rate', 0.05, 0.25)}
        
        o_model = lgb.train(params, lgbo.Dataset(X_train, label = y_train))
        pred=o_model.predict(X_test)
        y_pred = np.array(list(map(lambda x: int(x), pred>0.5)))
        f1sc = f1_score(y_test, y_pred)
        loss = 1 - f1sc
        return loss

    def train(self):
        """This function evaluates the model on best parameters"""
        print("Model will be trained on the following parameters: \n{}".format(self.params))
        #train the model with best parameters
        self.gbm = lgb.train(self.params, self.train_set)
    def evaluate(self, x_test, y_test):
        # predict the values from x_test
        pred = self.gbm.predict(x_test)
        y_pred = np.where(pred>0.5,1,0)
        #print confusion matrix
        print(confusion_matrix(y_test,y_pred))
        #print classification report
        print(classification_report(y_test, y_pred))