# Multi-Class classification with XGBoost

In [1]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine
print ("Packages loaded.")

Packages loaded.


### Load wine data

In [2]:
data = load_wine()
X,Y,class_names = data.data,data.target,data.target_names
print ("X:%s Y:%s"%(X.shape,Y.shape))
print ("class_names:%s"%(class_names))

X:(178, 13) Y:(178,)
class_names:['class_0' 'class_1' 'class_2']


In [3]:
print (X)
print (Y)

[[1.423e+01 1.710e+00 2.430e+00 ... 1.040e+00 3.920e+00 1.065e+03]
 [1.320e+01 1.780e+00 2.140e+00 ... 1.050e+00 3.400e+00 1.050e+03]
 [1.316e+01 2.360e+00 2.670e+00 ... 1.030e+00 3.170e+00 1.185e+03]
 ...
 [1.327e+01 4.280e+00 2.260e+00 ... 5.900e-01 1.560e+00 8.350e+02]
 [1.317e+01 2.590e+00 2.370e+00 ... 6.000e-01 1.620e+00 8.400e+02]
 [1.413e+01 4.100e+00 2.740e+00 ... 6.100e-01 1.600e+00 5.600e+02]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]


### Split data into train and test

In [4]:
seed = 1 # random seed
test_size = 0.33
X_train,X_test,Y_train,Y_test = train_test_split(
    X, Y, test_size=test_size, random_state=seed)
print ("X_train:%s X_test:%s"%(X_train.shape,X_test.shape))

X_train:(119, 13) X_test:(59, 13)


### Fit XGBoost 

In [5]:
model = XGBClassifier(
    num_class=3,
    use_label_encoder=False,
    max_depth=6,
    objective='multi:softmax',
    eval_metric='mlogloss')
ret = model.fit(X_train,Y_train)
print ("Train done.")
print (ret)

Train done.
XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eval_metric='mlogloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=16,
              num_class=3, num_parallel_tree=1, objective='multi:softprob',
              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=None,
              subsample=1, tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)


### Evaluate

In [6]:
Y_pred = model.predict(X_test)
predictions = [round(value) for value in Y_pred]
# evaluate predictions
accuracy = accuracy_score(Y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 96.61%


### Hyper-parameter search

In [7]:
etas =[0.1,0.3,0.5] # step size shrinkage
max_depths = [2,3,4,5,6] # maximum depth of a tree
for eta in etas:
    for max_depth in max_depths:
        model = XGBClassifier(
            num_class=3,
            use_label_encoder=False,
            eta=eta,
            max_depth=max_depth,
            objective='multi:softmax',
            eval_metric='mlogloss')
        ret = model.fit(X_train,Y_train) # train
        Y_pred = model.predict(X_test) # test
        predictions = [round(value) for value in Y_pred] 
        accuracy = accuracy_score(Y_test, predictions)
        print("eta:[%.1f] max_depth:[%d] => accuracy: %.2f%%" % 
              (eta,max_depth,accuracy * 100.0))

eta:[0.1] max_depth:[2] => accuracy: 98.31%
eta:[0.1] max_depth:[3] => accuracy: 96.61%
eta:[0.1] max_depth:[4] => accuracy: 96.61%
eta:[0.1] max_depth:[5] => accuracy: 96.61%
eta:[0.1] max_depth:[6] => accuracy: 96.61%
eta:[0.3] max_depth:[2] => accuracy: 98.31%
eta:[0.3] max_depth:[3] => accuracy: 96.61%
eta:[0.3] max_depth:[4] => accuracy: 96.61%
eta:[0.3] max_depth:[5] => accuracy: 96.61%
eta:[0.3] max_depth:[6] => accuracy: 96.61%
eta:[0.5] max_depth:[2] => accuracy: 98.31%
eta:[0.5] max_depth:[3] => accuracy: 96.61%
eta:[0.5] max_depth:[4] => accuracy: 96.61%
eta:[0.5] max_depth:[5] => accuracy: 96.61%
eta:[0.5] max_depth:[6] => accuracy: 96.61%
