<a href="https://colab.research.google.com/github/ksricharank/confidence/blob/master/XGBoost_custom.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install --upgrade xgboost

Collecting xgboost
[?25l  Downloading https://files.pythonhosted.org/packages/29/c8/d6ff9fe39cb93ef2e66bb411c69e0135dd1b1ea98827eadbb2ba8baecb44/xgboost-1.3.1-py3-none-manylinux2010_x86_64.whl (157.5MB)
[K     |████████████████████████████████| 157.5MB 92kB/s 
Installing collected packages: xgboost
  Found existing installation: xgboost 0.90
    Uninstalling xgboost-0.90:
      Successfully uninstalled xgboost-0.90
Successfully installed xgboost-1.3.1


In [1]:
import xgboost as xgb
xgb.__version__

'1.3.1'

In [72]:
import autograd.numpy as np
from autograd import elementwise_grad as egrad
from autograd import grad
from autograd import hessian as hess
from autograd import jacobian
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder
import xgboost as xgb

iris = datasets.load_iris()
X, y = iris.data, iris.target
dtrain = xgb.DMatrix(X, label=y)

def softmax_matrix(X):
    '''Row wise softmax function with X as input matrix.'''
    e = np.exp(X)
    return e / np.tile(np.sum(e, axis=1, keepdims=True),[1,X.shape[1]])

#print(softmax_matrix(np.array([[1,2,4],[3,4,3]])))

# builtin
params = {'objective': 'multi:softprob', 'num_class': 3}
model_builtin = xgb.train(params, dtrain, num_boost_round = 1)
preds_builtin = model_builtin.predict(dtrain)

# custom
def obj(preds, dtrain):
    labels = dtrain.get_label().reshape(-1, 1)
    labels = OneHotEncoder(sparse=False, categories='auto').fit_transform(labels)
    preds = softmax_matrix(preds)
    grad = preds - labels
    hess = 2.0 * preds * (1.0 - preds)
    return grad.flatten(), hess.flatten()

# custom w autograd
def obj_w_autograd(output, dtrain):
    labels = dtrain.get_label().reshape(-1, 1)
    labels = OneHotEncoder(sparse=False, categories='auto').fit_transform(labels)

    def training_loss(output_vector):
      # Training loss is the negative log-likelihood of the training labels.
      output_matrix = output_vector.reshape((-1,3))
      preds = softmax_matrix(output_matrix)
      return -np.sum(np.sum(labels*np.log(preds+1e-10),axis=1))

    def gradient_analytical(output_vector):
      output_matrix = output_vector.reshape((-1,3))
      preds = softmax_matrix(output_matrix)
      return preds.flatten()

    gradient_fun = grad(training_loss)
    hessian_fun = jacobian(gradient_fun)
    #hessian_fun = jacobian(gradient_analytical)
    
    gradient = gradient_fun(output.flatten())
    hessian = 2*np.diagonal(hessian_fun(output.flatten()))
    hessian = hessian.copy(order='C')
    print(gradient.shape, hessian.shape)

    return gradient, hessian

params = {'num_class': 3}
model_custom = xgb.train(params, dtrain, num_boost_round = 1, obj = obj_w_autograd)
preds_custom = model_custom.predict(dtrain, output_margin=True)
preds_custom = softmax_matrix(preds_custom)

# assert approaches give same results
assert np.sum(np.abs(preds_custom - preds_builtin)) <= 1e-4

(450,) (450,)


In [69]:
print(obj_w_autograd(preds_custom, dtrain))

(450,) (450,)
(array([-0.61301194,  0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,
        0.30632619, -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,
        0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,  0.30632619,
       -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,
        0.30632619, -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,
        0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,  0.30632619,
       -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,
        0.30632619, -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,
        0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,  0.30632619,
       -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,
        0.30632619, -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,
        0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,  0.30632619,
       -0.61301194,  0.3066857 ,  0.30632619, -0.61301194,  0.3066857 ,
        0.30632619, -0.61301194,  0.3066857 ,  0.

In [70]:
print(obj(preds_custom, dtrain))

(array([-0.61301193,  0.30668572,  0.30632621, -0.61301193,  0.30668572,
        0.30632621, -0.61301193,  0.30668572,  0.30632621, -0.61301193,
        0.30668572,  0.30632621, -0.61301193,  0.30668572,  0.30632621,
       -0.61301193,  0.30668572,  0.30632621, -0.61301193,  0.30668572,
        0.30632621, -0.61301193,  0.30668572,  0.30632621, -0.61301193,
        0.30668572,  0.30632621, -0.61301193,  0.30668572,  0.30632621,
       -0.61301193,  0.30668572,  0.30632621, -0.61301193,  0.30668572,
        0.30632621, -0.61301193,  0.30668572,  0.30632621, -0.61301193,
        0.30668572,  0.30632621, -0.61301193,  0.30668572,  0.30632621,
       -0.61301193,  0.30668572,  0.30632621, -0.61301193,  0.30668572,
        0.30632621, -0.61301193,  0.30668572,  0.30632621, -0.61301193,
        0.30668572,  0.30632621, -0.61301193,  0.30668572,  0.30632621,
       -0.61301193,  0.30668572,  0.30632621, -0.61301193,  0.30668572,
        0.30632621, -0.61301193,  0.30668572,  0.30632621, -0.6

In [22]:
print(preds_builtin[:10], preds_custom[:10])

[[0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]
 [0.4887712  0.2562008  0.25502795]] [[0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]
 [0.33333334 0.33333334 0.33333334]]


In [None]:
sigmoid(0.5)

0.6224593312018546