# XGBoost Monotonic Trend Validation
This notebook shows an example of checking monotonic trends have been applied successfully in an xgboost model.

<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#XGBoost-Monotonic-Trend-Validation" data-toc-modified-id="XGBoost-Monotonic-Trend-Validation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>XGBoost Monotonic Trend Validation</a></span></li><li><span><a href="#Build-example-xgboost-model" data-toc-modified-id="Build-example-xgboost-model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Build example xgboost model</a></span><ul class="toc-item"><li><span><a href="#Load-data-from-sklearn" data-toc-modified-id="Load-data-from-sklearn-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Load data from sklearn</a></span></li><li><span><a href="#Create-xgboost-matrix" data-toc-modified-id="Create-xgboost-matrix-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Create xgboost matrix</a></span></li><li><span><a href="#Set-up-monotonic-features-for-training" data-toc-modified-id="Set-up-monotonic-features-for-training-2.3"><span class="toc-item-num">2.3&nbsp;&nbsp;</span>Set up monotonic features for training</a></span></li><li><span><a href="#Build-model" data-toc-modified-id="Build-model-2.4"><span class="toc-item-num">2.4&nbsp;&nbsp;</span>Build model</a></span></li><li><span><a href="#Check-feature-importance" data-toc-modified-id="Check-feature-importance-2.5"><span class="toc-item-num">2.5&nbsp;&nbsp;</span>Check feature importance</a></span></li></ul></li><li><span><a href="#Check-monotonic-trends-have-been-implemented-correctly-in-the-model" data-toc-modified-id="Check-monotonic-trends-have-been-implemented-correctly-in-the-model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Check monotonic trends have been implemented correctly in the model</a></span><ul class="toc-item"><li><span><a href="#Get-tree-node-predictions" data-toc-modified-id="Get-tree-node-predictions-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Get tree node predictions</a></span></li><li><span><a href="#Check-monotonic-trends" data-toc-modified-id="Check-monotonic-trends-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Check monotonic trends</a></span></li></ul></li></ul></div>

In [1]:
import xgboost as xgb
import pandas as pd
from sklearn.datasets import load_boston

In [2]:
import pygbmexpl.xgb.parser as xgb_parser
import pygbmexpl.xgb.validate as xgb_validate

# Build example xgboost model

## Load data from sklearn

In [3]:
boston = load_boston()

## Create xgboost matrix

In [4]:
xgb_data = xgb.DMatrix(
    data = boston['data'], 
    label = boston['target'], 
    feature_names = boston['feature_names']
)

## Set up monotonic features for training

In [5]:
monotonic_constraints = pd.Series([0] * len(boston['feature_names']), index = boston['feature_names'])

In [6]:
monotonic_constraints.loc[monotonic_constraints.index.isin(['LSTAT','CRIM'])] = -1

In [7]:
monotonic_constraints.loc[monotonic_constraints.index.isin(['RM', 'CHAS'])] = 1

In [8]:
monotonic_constraints

CRIM      -1
ZN         0
INDUS      0
CHAS       1
NOX        0
RM         1
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT     -1
dtype: int64

In [9]:
tuple(monotonic_constraints)

(-1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, -1)

## Build model

In [10]:
model = xgb.train(
    params = {
        'silent': 1,
        'max_depth': 3,
        'monotone_constraints': tuple(monotonic_constraints)
    }, 
    dtrain = xgb_data, 
    num_boost_round = 100
)

## Check feature importance
Just using default of # times each variables is used to split

In [11]:
model.get_score()

{'LSTAT': 24,
 'RM': 39,
 'DIS': 145,
 'NOX': 53,
 'CRIM': 53,
 'TAX': 29,
 'PTRATIO': 30,
 'B': 88,
 'AGE': 123,
 'INDUS': 30,
 'RAD': 16,
 'ZN': 9,
 'CHAS': 2}

# Check monotonic trends have been implemented correctly in the model

## Get tree node predictions

In [12]:
tree_df = xgb_parser.extract_model_predictions(model)

In [13]:
tree_df.shape

(1382, 15)

In [14]:
tree_df['tree'].nunique()

100

In [15]:
tree_df.head()

Unnamed: 0,tree,nodeid,depth,yes,no,missing,split,split_condition,leaf,gain,cover,node_type,H,G,weight
0,0,0,0,1.0,2.0,1.0,LSTAT,9.725,,18247.6094,506,internal,506,-3275.224696,6.472776
1,0,1,1,3.0,4.0,3.0,RM,6.941,,6860.23438,212,internal,212,-1807.070584,8.523918
2,0,2,1,5.0,6.0,5.0,LSTAT,16.084999,,2385.59375,294,internal,294,-1468.154113,4.993721
3,0,3,2,7.0,8.0,7.0,DIS,1.48495,,203.609375,142,internal,142,-1031.574769,7.264611
4,0,4,2,9.0,10.0,9.0,RM,7.437,,713.554688,70,internal,70,-775.495815,11.078512


## Check monotonic trends

In [16]:
monotonic_constraint_check = xgb_validate.validate_monotonic_constraints_df(
    trees_df = tree_df, 
    constraints = {
        'LSTAT': -1,
        'CRIM': -1,
        'RM': 1,
        'CHAS': 1
    }
)

In [17]:
monotonic_constraint_check

variable
CHAS     True
CRIM     True
LSTAT    True
RM       True
Name: monotonic, dtype: bool

In [18]:
monotonic_constraint_check2 = xgb_validate.validate_monotonic_constraints_df(
    trees_df = tree_df, 
    constraints = {
        'LSTAT': -1,
        'CRIM': -1,
        'RM': 1,
        'CHAS': 1
    },
    return_detailed_results = True
)

In [19]:
monotonic_constraint_check2.head(10)

Unnamed: 0,variable,tree,nodeid,monotonic_trend,monotonic,child_nodes_left_max_prediction,child_nodes_right_min_prediction,child_nodes_left,child_nodes_right
0,CHAS,63,2,1,True,0.104237,0.104237,"[5.0, 11.0, 12.0]","[6.0, 13.0, 14.0]"
1,CHAS,72,4,1,True,-0.006902,0.086598,[9.0],[10.0]
2,CRIM,1,4,-1,True,3.540831,2.162131,[9.0],[10.0]
3,CRIM,4,4,-1,True,1.194091,0.150783,[9.0],[10.0]
4,CRIM,6,2,-1,True,0.685518,-0.715208,"[5.0, 9.0, 10.0]","[6.0, 11.0, 12.0]"
5,CRIM,6,6,-1,True,-0.017087,-0.715208,[11.0],[12.0]
6,CRIM,7,1,-1,True,4.39068,0.326724,[3.0],[4.0]
7,CRIM,8,1,-1,True,3.512544,0.277715,[3.0],[4.0]
8,CRIM,11,4,-1,True,0.104852,-0.437945,[9.0],[10.0]
9,CRIM,13,4,-1,True,0.613249,0.061723,[9.0],[10.0]


In [20]:
monotonic_constraint_check2.tail(10)

Unnamed: 0,variable,tree,nodeid,monotonic_trend,monotonic,child_nodes_left_max_prediction,child_nodes_right_min_prediction,child_nodes_left,child_nodes_right
107,RM,52,4,1,True,-0.341639,0.015039,[9.0],[10.0]
108,RM,53,3,1,True,0.026007,0.605285,[7.0],[8.0]
109,RM,61,3,1,True,-0.02984,0.185373,[7.0],[8.0]
110,RM,70,6,1,True,-0.002281,0.397346,[11.0],[12.0]
111,RM,79,6,1,True,-0.004531,0.316898,[11.0],[12.0]
112,RM,81,5,1,True,-0.02655,0.14079,[11.0],[12.0]
113,RM,82,5,1,True,-0.097621,0.291219,[11.0],[12.0]
114,RM,86,3,1,True,-0.025446,0.102896,[7.0],[8.0]
115,RM,94,3,1,True,-0.027242,0.023875,[7.0],[8.0]
116,RM,98,6,1,True,-0.002891,0.264708,[11.0],[12.0]
