In [1]:
import warnings
warnings.filterwarnings('ignore') 
import csv
import numpy as np
import scipy.sparse as spr
import pandas as pd

with open('boston.csv', 'rt') as f:
    data = list(csv.DictReader(f))

In [2]:
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

_all_xs = [{k: v for k, v in row.items() if k != 'TARGET'} for row in data]
_all_ys = np.array([float(row['TARGET']) for row in data])

all_xs, all_ys = shuffle(_all_xs, _all_ys, random_state=0)
train_xs, valid_xs, train_ys, valid_ys = train_test_split(
    all_xs, all_ys, test_size=0.25, random_state=0)

print('{} items total'.format(len(all_xs)))

506 items total


In [3]:
warnings.filterwarnings('ignore', category=DeprecationWarning)
from xgboost import XGBRegressor
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

class CSCTransformer:
    def transform(self, xs):
        return xs.tocsc()
    def fit(self, *args):
        return self
    
clf = XGBRegressor()
vec = DictVectorizer()
pipeline = make_pipeline(vec, CSCTransformer(), clf)

def evaluate(_clf):
    scores = cross_val_score(_clf, all_xs, all_ys, cv=10)
    print('Acc: {:.4f} ± {:.4f}'.format(np.mean(scores), 2 * np.std(scores)))
    _clf.fit(train_xs, train_ys)
     
evaluate(pipeline)

Acc: 0.6751 ± 0.2158


In [4]:
from xgboostinterpreter import xgboostinterpreter as xgbi

In [5]:
xgbi.weight(clf, vec, 15)

+-----------+---------+-------------+
|    Weight | Feature |       Value |
+-----------+---------+-------------+
|  0.172535 | INDUS   | 18.10000038 |
|  0.122612 | MV      |          50 |
| 0.0817789 | PT      | 20.20000076 |
| 0.0477658 | ZN      |           0 |
| 0.0384741 | RAD     |           4 |
| 0.0269798 | INDUS   | 6.199999809 |
| 0.0210037 | ZN      |          20 |
| 0.0177974 | DIS     | 3.495199919 |
| 0.0174249 | NOX     | 0.504000008 |
| 0.0172703 | ZN      |        12.5 |
| 0.0172297 | NOX     | 0.870999992 |
|  0.016497 | PT      |          21 |
| 0.0163267 | INDUS   | 2.460000038 |
| 0.0147658 | ZN      |          95 |
| 0.0145028 | NOX     | 0.693000019 |
+-----------+---------+-------------+


In [6]:
xgbi.predict(clf, valid_xs[0], vec)

Answer:  25.6487
+--------------+---------+-------------+
| Contribution | Feature |       Value |
+--------------+---------+-------------+
|    21.695153 | <BIAS>  |             |
|     2.556438 | ZN      |           0 |
|     1.033927 | INDUS   | 18.10000038 |
|     0.423321 | PT      | 20.20000076 |
|     0.241702 | ZN      |        12.5 |
|     0.208819 | RAD     |           4 |
|     0.142460 | PT      |          21 |
|     0.139414 | NOX     | 0.870999992 |
|     0.073743 | NOX     | 0.693000019 |
|     0.068698 | NOX     |  0.74000001 |
|     0.056680 | INDUS   | 21.88999939 |
|     0.056569 | INDUS   | 5.130000114 |
|     0.044118 | NOX     | 0.699999988 |
|     0.043046 | INDUS   | 27.73999977 |
|     0.042861 | NOX     |  0.67900002 |
|     0.035584 | RAD     |           6 |
|     0.013775 | PT      | 19.10000038 |
|     0.005259 | DIS     | 7.954899788 |
|     0.004922 | LSTAT   | 30.80999947 |
+--------------+---------+-------------+
