
# Decision Trees with visualisations while buiding tree

Decision Trees with visualisation while building trees.

This example covers the use of 
  
  1. Classification and 
  2. Regression Tree 
  
From ***spkit*** library with different verbosity mode while training and plotting resulting decision tree after training. We use two different datasets Iris and Breast Cancer for classification and Boston Housing price for Regression. 


In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
import spkit
print('spkit version :', spkit.__version__)

# just to makesure same results
np.random.seed(11) 

# import Classification and Regression Tree from spkit
from spkit.ml import ClassificationTree, RegressionTree

# import dataset and train-test split from sklearn or use your own dataset
from sklearn import datasets
from sklearn.model_selection import train_test_split

Classification Tree



In [None]:
# 1. Iris Dataset

# Loading and spliting for training and testing
data = datasets.load_iris()
X = data.data
y = data.target

feature_names = data.feature_names #Optional

Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)

print('Shapes', X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)

# Fitting a model (displaying the tree building) with different modes

# 1) verbose=0 (silence mode)

model = ClassificationTree()
model.fit(Xt,yt,verbose=0,feature_names=feature_names)


# 2) verbose=1 (progress bar)


model = ClassificationTree()
model.fit(Xt,yt,verbose=1,feature_names=feature_names)

# 3) verbose=2 (printing tree info)


model = ClassificationTree()
model.fit(Xt,yt,verbose=2,feature_names=feature_names)


# 4) verbose=3 (printing branches only)

model = ClassificationTree()
model.fit(Xt,yt,verbose=3,feature_names=feature_names)


# 5) verbose=4 (Plotting tree.. while building)


model = ClassificationTree()
model.fit(Xt,yt,verbose=4,feature_names=feature_names)


plt.figure(figsize=(10,6))
model.plotTree(show=True,scale=False)


plt.figure(figsize=(8,6))
model.plotTree(DiffBranchColor=False)


# Predicting

ytp = model.predict(Xt)
ysp = model.predict(Xs)


ytpr = model.predict_proba(Xt)[:,1]
yspr = model.predict_proba(Xs)[:,1]

print('Depth of trained Tree ', model.getTreeDepth())
print('Accuracy')
print('- Training : ',np.mean(ytp==yt))
print('- Testing  : ',np.mean(ysp==ys))
print('Logloss')
Trloss = -np.mean(yt*np.log(ytpr+1e-10)+(1-yt)*np.log(1-ytpr+1e-10))
Tsloss = -np.mean(ys*np.log(yspr+1e-10)+(1-ys)*np.log(1-yspr+1e-10))
print('- Training : ',Trloss)
print('- Testing  : ',Tsloss)


# Iris data with smaller tree


model = ClassificationTree(max_depth=3)
model.fit(Xt,yt,verbose=1,feature_names=feature_names)
plt.figure(figsize=(5,5))
model.plotTree(show=True,DiffBranchColor=True)
ytp = model.predict(Xt)
ysp = model.predict(Xs)

ytpr = model.predict_proba(Xt)[:,1]
yspr = model.predict_proba(Xs)[:,1]

print('Depth of trained Tree ', model.getTreeDepth())
print('Accuracy')
print('- Training : ',np.mean(ytp==yt))
print('- Testing  : ',np.mean(ysp==ys))
print('Logloss')
Trloss = -np.mean(yt*np.log(ytpr+1e-10)+(1-yt)*np.log(1-ytpr+1e-10))
Tsloss = -np.mean(ys*np.log(yspr+1e-10)+(1-ys)*np.log(1-yspr+1e-10))
print('- Training : ',Trloss)
print('- Testing  : ',Tsloss)

Breast Cancer data 



In [None]:
data = datasets.load_breast_cancer()
X = data.data
y = data.target

feature_names = data.feature_names #Optional

Xt,Xs, yt, ys = train_test_split(X,y,test_size=0.3)

print(X.shape,y.shape, Xt.shape, yt.shape, Xs.shape, ys.shape)


# Fitting model with displaying the details of tree in process (verbose=4)

# **While building tree, To first choose True branch and then False set randomBranch=False**


model = ClassificationTree()
model.fit(Xt,yt,verbose=4,feature_names=feature_names,randomBranch=False)
plt.show()


# **To randomly selevting True or False branch set randomBranch=True**

model = ClassificationTree()
model.fit(Xt,yt,verbose=4,feature_names=feature_names,randomBranch=True)
plt.show()


#  Resulting tree

plt.figure(figsize=(10,6))
model.plotTree(show=True,DiffBranchColor=True,scale=False)
plt.show()


#  Fitting model with displaying the progress only (verbose=1)


# %matplotlib inline
model = ClassificationTree()
model.fit(Xt,yt,verbose=1,feature_names=feature_names)

plt.figure(figsize=(6,6))
model.plotTree()
plt.show()

# Predicting

ytp = model.predict(Xt)
ysp = model.predict(Xs)

ytpr = model.predict_proba(Xt)[:,1]
yspr = model.predict_proba(Xs)[:,1]

print('Depth of trained Tree ', model.getTreeDepth())
print('Accuracy')
print('- Training : ',np.mean(ytp==yt))
print('- Testing  : ',np.mean(ysp==ys))
print('Logloss')
Trloss = -np.mean(yt*np.log(ytpr+1e-10)+(1-yt)*np.log(1-ytpr+1e-10))
Tsloss = -np.mean(ys*np.log(yspr+1e-10)+(1-ys)*np.log(1-yspr+1e-10))
print('- Training : ',Trloss)
print('- Testing  : ',Tsloss)