In [1]:
import numpy as np
import matplotlib.pyplot as plt
# AdaBoost Algorithm
from sklearn.ensemble import AdaBoostClassifier
# Gradient Boosting 
from sklearn.ensemble import GradientBoostingClassifier

plt.rcParams['font.size'] = 14

In [2]:
# XGBoost 
import xgboost
from xgboost import XGBClassifier
from xgboost import plot_importance, to_graphviz, plot_tree
print("XGBoost version:",xgboost.__version__)

XGBoost version: 1.5.0


  from pandas import MultiIndex, Int64Index


In [3]:
np.random.seed(123)

CASE = 1

S = 2
N = 2000
N_train = 1000
N_test  = N - N_train
x = S*(2*np.random.rand(N,2)-1)
y = np.zeros(N).astype(int)

for n in range(N):
    if CASE==1:
        if x[n,1]<-0.6 and x[n,0]>-0.2: y[n]=1
        if x[n,1]>0.4 and x[n,0]<-0.8: y[n]=1
        if x[n,1]>1.0 and x[n,0]>0.8: y[n]=1
    elif CASE==2:
        if x[n,1]<0 and x[n,0]>0.5: y[n]=1
        if x[n,1]>0 and np.sqrt((x[n,0]+0.3)**2+x[n,1]**2)<1.5: y[n]=1

x_train,y_train = x[:N_train],y[:N_train]
x_test,y_test = x[N_train:],y[N_train:]

In [4]:
# decorater used to block function printing to the console
import os, sys
def blockPrinting(func):
    def func_wrapper(*args, **kwargs):
        # block all printing to the console
        sys.stdout = open(os.devnull, 'w')
        # call the method in question
        value = func(*args, **kwargs)
        # enable all printing to the console
        sys.stdout = sys.__stdout__
        # pass the return value of the method back
        return value

    return func_wrapper

In [5]:
@blockPrinting
def classify(clf=GradientBoostingClassifier(),show=False):
    # GradientBoostingClassifier():
    #   n_estimators = 100 (default)
    #   loss function = deviance(default) used in Logistic Regression
    # XGBClassifier()
    #   n_estimators = 100 (default)
    #   max_depth = 3 (default)
    clf.fit(x_train,y_train)
    y_hat = clf.predict(x_test)
    err = 100*(1-clf.score(x_test, y_test))
    #if CASE<10: print("errors: {:.2f}%".format(err))

    if show:    
        dx = 0.02
        x_seq=np.arange(-S,S+dx,dx)
        nx = len(x_seq)
        x_plot=np.zeros((nx*nx,2))
        q=0
        for i in range(nx):
            for j in range(nx):
                x_plot[q,:] = [x_seq[i],x_seq[j]]
                q+=1
        y_plot= clf.predict(x_plot)
        plt.figure(figsize=(6,6))
        plt.title(str(clf))

        scat(x_plot,y_plot,cmap="winter",s=1)
        scat(x_train,y_train,s=7)
        plt.show()
    
#     if show:      
#         dump_list = clf.get_booster().get_dump()
#         num_trees = len(dump_list)
#         print("num_trees=",num_trees)
        
#         fig, AX = plt.subplots(3,1,figsize=(30, 30))
#         for i in range(min(3,num_trees)):
#             ax=AX[i]
#             plot_tree(clf, num_trees=i, ax=ax)
#         fig.savefig("DATA/tree-classif.png", dpi=300, pad_inches=0.02)   
#         plt.show()

    return err

<b>For the labeling of simple two dimensional data (as the one generated during the lesson), try different
parameters (gamma, lambda, n_estimators, ...), aiming to find the simplest yet effective XGBoost
model that keeps a good accuracy.</b>

In [6]:
# The defaults for XGBClassifier are:
#     max_depth=3
#     learning_rate=0.1
#     n_estimators=100
#     silent=True
#     objective='binary:logistic'
#     booster='gbtree'
#     n_jobs=1
#     nthread=None
#     gamma=0
#     min_child_weight=1
#     max_delta_step=0
#     subsample=1
#     colsample_bytree=1
#     colsample_bylevel=1
#     reg_alpha=0
#     reg_lambda=1
#     scale_pos_weight=1
#     base_score=0.5
#     random_state=0
#     seed=None
#     missing=None
import warnings
warnings.filterwarnings("ignore")

## learning_rate

In [7]:
err_ = []
lr_ = [1e-4, 0.001, 0.01, 0.1, 1, 10]
print('!!! Here we plot the results:')
for lr in lr_:
    
    err_.append(classify(XGBClassifier(seed=1, learning_rate=lr),show=False))
    
for idx, lr in enumerate(lr_):
    print(f'learning rate {lr} err = {round(err_[idx], 2)} %')

!!! Here we plot the results:
learning rate 0.0001 err = 0.2 %
learning rate 0.001 err = 0.2 %
learning rate 0.01 err = 0.2 %
learning rate 0.1 err = 0.3 %
learning rate 1 err = 0.5 %
learning rate 10 err = 0.2 %
n_estimators 1 err = 0.2 %
n_estimators 10 err = 0.2 %
n_estimators 50 err = 0.2 %
n_estimators 100 err = 0.2 %
n_estimators 200 err = 0.2 %
n_estimators 1000 err = 0.3 %
gamma 0.0 err = 0.2 %
gamma 1.0 err = 0.2 %
gamma 2.0 err = 0.2 %
gamma 4.0 err = 0.2 %
gamma 10.0 err = 0.2 %
gamma 20.0 err = 0.3 %
gamma 40.0 err = 0.3 %
gamma 100.0 err = 1.2 %
gamma 200.0 err = 29.8 %
lambda 100.0 err = 1.4 %
lambda 30 err = 0.7 %
lambda 20.0 err = 0.4 %
lambda 10.0 err = 0.3 %
lambda 5.0 err = 0.2 %
lambda 1.0 err = 0.2 %
lambda 0.5 err = 0.2 %
lambda 0.1 err = 0.2 %


Best learning rate is for 0.2%, so either 0.0001, 0.001, 0.01 or 10. We are using 0.01 for next experiments.

## n_estimators

In [8]:
err_ = []
par_ = [1, 10, 50, 100, 200, 1000]
#par_ = [1, 2, 5, 10]
for par in par_:
    err_.append(classify(XGBClassifier(seed=1, n_estimators=par, learning_rate=0.01),show=False))
    
for idx, par in enumerate(par_):
    print(f'n_estimators {par} err = {round(err_[idx], 4)} %')

Error does not change when varying n_estimators.

##  gamma

In [9]:
err_ = []
par_ = [0.,1.,2.,4.,10.,20.,40.,100.,200.]
for par in par_:
    err_.append(classify(XGBClassifier(seed=1, learning_rate=0.01, gamma=par),show=False))
    
for idx, par in enumerate(par_):
    print(f'gamma {par} err = {round(err_[idx], 2)} %')

gamma 0.0 = 0.2 %
gamma 1.0 = 0.2 %
gamma 2.0 = 0.2 %
gamma 4.0 = 0.2 %
gamma 10.0 = 0.2 %
gamma 20.0 = 0.3 %
gamma 40.0 = 0.3 %
gamma 100.0 = 1.2 %
gamma 200.0 = 29.8 %

Small gamma leads to best results.

## lambda

In [10]:
err_ = []
par_ = [100.,30,20.,10.,5.,1.,0.5,0.1]
for par in par_:
    err_.append(classify(XGBClassifier(seed=1, learning_rate=0.01, gamma=1, reg_lambda=par),show=False))
    
for idx, par in enumerate(par_):
    print(f'lambda {par} err = {round(err_[idx], 2)} %')

lambda 100.0 = 1.4 %
lambda 30 = 0.7 %
lambda 20.0 = 0.4 %
lambda 10.0 = 0.3 %
lambda 5.0 = 0.2 %
lambda 1.0 = 0.2 %
lambda 0.5 = 0.2 %
lambda 0.1 = 0.2 %
Also, small reg_lambda leads to best results.