In [4]:
import numpy as np
import xgboost as xgb
from sklearn import metrics

In [16]:
# label need to be 0 to num_class -1
data = np.loadtxt('./data/dermatology.data', delimiter=',',converters={33: lambda x:int(x == '?'), 34: lambda x:int(x)-1 } )
sz = data.shape
train = data[:int(sz[0] * 0.7), :]
test = data[int(sz[0] * 0.7):, :]
train_X = train[:,0:33]
train_Y = train[:, 34]
test_X = test[:,0:33]
test_Y = test[:, 34]
xg_train = xgb.DMatrix( train_X, label=train_Y)
xg_test = xgb.DMatrix(test_X, label=test_Y)
# setup parameters for xgboost
param = {
    # use softmax multi-class classification
    'objective' : 'multi:softmax',
    # scale weight of positive examples
    'eta': 0.1,
    'max_depth': 6,
    'silent': 1,
    'nthread': 4, 
    'num_class': 6
}

watchlist = [ (xg_train,'train'), (xg_test, 'test') ]
num_round = 5
bst = xgb.train(param, xg_train, num_round, watchlist );

# get prediction
pred = bst.predict( xg_test );
print ("pred: %s" % pred)
print ('predicting, classification error=%f' % (sum( int(pred[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))
print metrics.accuracy_score(test_Y, pred)

# do the same thing again, but output probabilities
param['objective'] = 'multi:softprob'
bst = xgb.train(param, xg_train, num_round, watchlist );
# Note: this convention has been changed since xgboost-unity
# get prediction, this is in 1D array, need reshape to (ndata, nclass)

yprob = bst.predict( xg_test )
print ("yprob::", yprob.shape)
yprob = yprob.reshape( test_Y.shape[0], 6 )
ylabel = np.argmax(yprob, axis=1)

print ("prob: %s" % yprob[0])
print ("ylabel: %s" % ylabel)
print ('predicting, classification error=%f' % (sum( int(ylabel[i]) != test_Y[i] for i in range(len(test_Y))) / float(len(test_Y)) ))

[0]	train-merror:0.011719	test-merror:0.127273
[1]	train-merror:0.015625	test-merror:0.127273
[2]	train-merror:0.011719	test-merror:0.109091
[3]	train-merror:0.007812	test-merror:0.081818
[4]	train-merror:0.007812	test-merror:0.090909
pred: [ 3.  1.  3.  3.  3.  1.  0.  1.  1.  4.  5.  5.  5.  1.  3.  1.  0.  0.
  0.  0.  0.  1.  1.  3.  3.  1.  0.  0.  1.  1.  1.  2.  2.  2.  2.  0.
  0.  0.  0.  4.  4.  4.  4.  4.  2.  2.  2.  3.  0.  0.  3.  3.  3.  0.
  0.  0.  2.  2.  2.  2.  2.  0.  0.  0.  0.  3.  3.  0.  0.  3.  2.  2.
  1.  0.  0.  3.  3.  4.  4.  0.  0.  4.  4.  2.  0.  4.  4.  5.  5.  3.
  1.  5.  5.  5.  0.  0.  0.  4.  4.  0.  0.  0.  0.  1.  1.  3.  3.  2.
  2.  0.]
predicting, classification error=0.090909
0.909090909091
[0]	train-merror:0.011719	test-merror:0.127273
[1]	train-merror:0.015625	test-merror:0.127273
[2]	train-merror:0.011719	test-merror:0.109091
[3]	train-merror:0.007812	test-merror:0.081818
[4]	train-merror:0.007812	test-merror:0.090909
('yprob::', (110, 6