## Import the Libraries

In [1]:
import lightgbm as lgb
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn import metrics
from sklearn.datasets import load_breast_cancer
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_uniform
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from scipy.stats import uniform as sp_uniform

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


## Download Dataset 

In [None]:
X, y = load_breast_cancer(return_X_y=True)

## Split Data 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

## Set Parameters for LightGBM model

In [None]:
parameters={"early_stopping_rounds":20, 
            "eval_metric" : 'auc', 
            "eval_set" : [(X_test,y_test)],
            'eval_names': ['valid'],
            'verbose': 100,
            'categorical_feature': 'auto'}

## Create Parameters to Tune 

In [None]:
parameter_tuning ={
             'max_depth': sp_randint(10,50),
             'num_leaves': sp_randint(6, 50), 
             'learning_rate ': [0.1,0.01,0.001],
             'min_child_samples': sp_randint(100, 500), 
             'min_child_weight': [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4],
             'subsample': sp_uniform(loc=0.2, scale=0.8), 
             'colsample_bytree': sp_uniform(loc=0.4, scale=0.6),
             'reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
             'reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100]}

## Model Training  

In [None]:
classifier = lgb.LGBMClassifier(random_state=300, silent=True, metric='None', n_jobs=4, n_estimators=5000)

find_parameters = RandomizedSearchCV(
    estimator=classifier, param_distributions=parameter_tuning, 
    n_iter=100,
    scoring='roc_auc',
    cv=5,
    refit=True,
    random_state=300,
    verbose=False)

## Fit Parameters

In [None]:
find_parameters.fit(X_train, y_train, **parameters)
print('Best score : {} with parameters: {} '.format(find_parameters.best_score_, find_parameters.best_params_))

Training until validation scores don't improve for 20 rounds.
Early stopping, best iteration is:
[35]	valid's auc: 0.977254
Training until validation scores don't improve for 20 rounds.
Early stopping, best iteration is:
[19]	valid's auc: 0.98847
Training until validation scores don't improve for 20 rounds.
Early stopping, best iteration is:
[7]	valid's auc: 0.976625
Training until validation scores don't improve for 20 rounds.
Early stopping, best iteration is:
[10]	valid's auc: 0.984067
Training until validation scores don't improve for 20 rounds.
Early stopping, best iteration is:
[28]	valid's auc: 0.983333
Training until validation scores don't improve for 20 rounds.
Early stopping, best iteration is:
[40]	valid's auc: 0.980608
Training until validation scores don't improve for 20 rounds.
Early stopping, best iteration is:
[61]	valid's auc: 0.984067
Training until validation scores don't improve for 20 rounds.
[100]	valid's auc: 0.984696
Early stopping, best iteration is:
[96]	vali

In [None]:
best_parameters = find_parameters.best_params_
best_parameters

{'colsample_bytree': 0.7616621541406502,
 'learning_rate ': 0.1,
 'max_depth': 24,
 'min_child_samples': 152,
 'min_child_weight': 1,
 'num_leaves': 18,
 'reg_alpha': 2,
 'reg_lambda': 0.1,
 'subsample': 0.6638319967141887}

In [None]:
best_parameters_model = lgb.LGBMClassifier(**best_parameters)
best_parameters_model.set_params(**best_parameters)

LGBMClassifier(boosting_type='gbdt', class_weight=None,
               colsample_bytree=0.7616621541406502, importance_type='split',
               learning_rate=0.1, learning_rate =0.1, max_depth=24,
               min_child_samples=152, min_child_weight=1, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=18, objective=None,
               random_state=None, reg_alpha=2, reg_lambda=0.1, silent=True,
               subsample=0.6638319967141887, subsample_for_bin=200000,
               subsample_freq=0)