In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.decomposition import PCA
from datetime import datetime
import pickle
import json
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from util import jupyter_util as ju
from util.model_util import ModelWrapper, REPORT_FILE, RSTATE, N_JOBS, MAX_ITER, LABEL_COL

DESCRIPTION = 'diff-ohe-1985'

In [2]:
report = pd.read_csv(REPORT_FILE)
mw = ModelWrapper.get_model_wrapper_from_report(report[(report.model_name == 'DecisionTreeClassifier') & (report.description == DESCRIPTION)])

In [3]:
? mw.model.tree_

[0;31mType:[0m        Tree
[0;31mString form:[0m <sklearn.tree._tree.Tree object at 0x7fd5f8a4a100>
[0;31mFile:[0m        //anaconda3/envs/capstone2/lib/python3.7/site-packages/sklearn/tree/_tree.cpython-37m-darwin.so
[0;31mDocstring:[0m  
Array-based representation of a binary decision tree.

The binary tree is represented as a number of parallel arrays. The i-th
element of each array holds information about the node `i`. Node 0 is the
tree's root. You can find a detailed description of all arrays in
`_tree.pxd`. NOTE: Some of the arrays only apply to either leaves or split
nodes, resp. In this case the values of nodes of the other type are
arbitrary!

Attributes
----------
node_count : int
    The number of nodes (internal nodes + leaves) in the tree.

capacity : int
    The current capacity (i.e., size) of the arrays, which is at least as
    great as `node_count`.

max_depth : int
    The depth of the tree, i.e. the maximum depth of its leaves.

children_left : array of int

In [4]:
mw.model.tree_.node_count

6405

In [5]:
mw.model.tree_.capacity

6405

In [11]:
mw.model.tree_.children_left

array([   1,    2,    3, ..., 6403,   -1,   -1])

In [7]:
mw.model.tree_.children_right

array([2350, 2225, 1614, ..., 6404,   -1,   -1])

In [12]:
mw.model.tree_.feature.tolist()

[5,
 3,
 5,
 5,
 1,
 -2,
 3,
 4178,
 0,
 -2,
 239,
 2899,
 3192,
 306,
 1930,
 2216,
 3230,
 -2,
 96,
 -2,
 -2,
 1,
 -2,
 -2,
 375,
 -2,
 -2,
 211,
 -2,
 -2,
 -2,
 -2,
 4,
 -2,
 -2,
 8,
 -2,
 -2,
 0,
 -2,
 4385,
 -2,
 -2,
 4,
 5,
 3,
 5,
 5,
 5,
 5,
 5,
 5,
 0,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 0,
 178,
 1607,
 274,
 3618,
 231,
 5,
 5,
 324,
 1675,
 236,
 1,
 3227,
 1472,
 3252,
 4446,
 1957,
 1097,
 3745,
 966,
 4284,
 3065,
 258,
 4269,
 1982,
 175,
 2975,
 224,
 1518,
 1476,
 -2,
 336,
 -2,
 -2,
 3,
 -2,
 -2,
 3,
 -2,
 -2,
 3,
 -2,
 -2,
 0,
 -2,
 -2,
 2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 96,
 134,
 3,
 1913,
 3929,
 3344,
 -2,
 -2,
 -2,
 -2,
 2,
 53,
 165,
 144,
 52,
 4267,
 779,
 4018,
 -2,
 50,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 4,
 -2,
 -2,
 -2,
 3,
 -2,
 -2,
 1430,
 -2,
 -2,
 -2,
 0,
 -2,
 -2,
 8,
 -2,
 3,
 -2,
 -2,
 -2,
 -2,
 5,
 -2,
 -2,
 -2,
 -2,
 -2,
 -2,
 5,
 5,
 5,
 5,
 8,
 5,
 10,
 5,
 5,
 438,
 -2,
 -2,
 -2,
 -2,
 584,
 5,
 7,
 3,
 -

In [13]:
mw.model.tree_.impurity.tolist()

[0.49999999919839755,
 0.4344663229177822,
 0.4722340990612022,
 0.3921451551278823,
 0.09771521404419758,
 0.0,
 0.04426794165390724,
 0.019898487411934007,
 0.015713300171407485,
 0.0,
 0.012893439908845616,
 0.010064444184570198,
 0.008639146249076579,
 0.0072097109623210365,
 0.005780298256668814,
 0.002911201965167587,
 0.0014598532356612237,
 0.0,
 0.31999999999999995,
 0.0,
 0.0,
 0.4444444444444444,
 0.0,
 0.0,
 0.375,
 0.0,
 0.0,
 0.5,
 0.0,
 0.0,
 0.0,
 0.0,
 0.4444444444444444,
 0.0,
 0.0,
 0.375,
 0.0,
 0.0,
 0.18000000000000005,
 0.0,
 0.4444444444444444,
 0.0,
 0.0,
 0.3417951358782282,
 0.3023773032299465,
 0.2436068996169879,
 0.35457624362901907,
 0.3716840054106313,
 0.35038027086903767,
 0.3719731420787268,
 0.3467933025143237,
 0.37169929318847805,
 0.3434687138376892,
 0.29946873727736356,
 0.3238566685177472,
 0.28860680393564786,
 0.3175980248879049,
 0.28404285856306377,
 0.3226213242221171,
 0.28199842666511277,
 0.32455101684051046,
 0.27293237205851517,
 0.17