# Plot XGBoost Decision Tree
Useful for understanding how the decision tree is formed (including what features are used for splitting and the split decisions).

In [1]:
from numpy import loadtxt
from xgboost import XGBClassifier
from xgboost import plot_tree
from matplotlib import pyplot

## Load data

In [2]:
# Pima Indians dataset from https://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes
# Loads the csv file as a NumPy array using NumPy function loadtext()
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")

## Separate into X (features) and y (label)

In [3]:
X = dataset[:,0:8]
y = dataset[:,8]

## Build the model using the entire dataset

In [4]:
model = XGBClassifier()
model.fit(X, y)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

## Plot decision tree

In [5]:
%matplotlib notebook

In [6]:
# pre-requisite: need to install the module 'graphviz'
# sudo apt-get install python3-graphviz
plot_tree(model)
pyplot.show()

<IPython.core.display.Javascript object>

In [7]:
# To plot a specific decision tree within an xgboost model, specify their index to the num trees argument.
# e.g. to plot the 5th boosted tree in the sequence (remember: first index value is 0)
plot_tree(model, num_trees=4)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f1db3dd1b38>

In [8]:
# To plot left-to-right layout (instead of default top to bottom 'UT')
plot_tree(model, num_trees=0, rankdir='LR')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x7f1db3d94cf8>