In [1]:
# [Scikit-learn - Tutorial](https://scikit-learn.org/dev/modules/tree.html)
# [Medium - Decision Tree In Python](https://towardsdatascience.com/decision-tree-in-python-b433ae57fb93)
# import libraries.
import pandas as pd
import numpy as np
import sklearn as sk
from sklearn import *
import graphviz

In [2]:
# load the iris dataset.
iris = sk.datasets.load_iris()

In [3]:
# fetch the input dataframe.
input_dataframe = pd.DataFrame(iris.data, columns=iris.feature_names)
input_dataframe.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
# fetch the output dataframe.
output_categorical = pd.Categorical.from_codes(iris.target, iris.target_names)
output_dataframe = pd.get_dummies(output_categorical)

In [5]:
# define functions for transferring pandas dataframe to sklearn array.
def get_sk_decision_tree_input(dataframe):
    return dataframe.to_numpy()

def get_sk_decision_tree_output(dataframe):
    return np.array(dataframe).argmax(axis=1)

In [6]:
# split input and output dataframes into random train and test subsets.
TEST_SIZE = 0.25
train_input_dataframe, test_input_dataframe, train_output_dataframe, test_output_dataframe = sk.model_selection.train_test_split(input_dataframe, output_dataframe, test_size=TEST_SIZE, random_state=1)
train_input_array = get_sk_decision_tree_input(train_input_dataframe)
test_input_array = get_sk_decision_tree_input(test_input_dataframe)
train_output_array = get_sk_decision_tree_output(train_output_dataframe)
test_output_array = get_sk_decision_tree_output(test_output_dataframe)

In [7]:
# generate the decision tree classifier.
decision_tree_classifier = sk.tree.DecisionTreeClassifier()
decision_tree = decision_tree_classifier.fit(train_input_array, train_output_array)

In [8]:
# output pdf of the decision tree.
OUTPUT_FILE_NAME = 'Ivis Decision Tree'
graphviz_dot = sk.tree.export_graphviz(decision_tree, out_file=None, feature_names=iris.feature_names)
graph = graphviz.Source(graphviz_dot)
graph.render(OUTPUT_FILE_NAME)

'Ivis Decision Tree.pdf'

In [9]:
# predict using the decision tree classifier.
predicted_output_array = decision_tree_classifier.predict(test_input_array)

# determine decision tree classifier accuracy using confusion matrix.
sk.metrics.confusion_matrix(test_output_array, predicted_output_array)

array([[13,  0,  0],
       [ 0, 15,  1],
       [ 0,  0,  9]], dtype=int64)