This experiment use the tennis.txt dataset, which contains 14 samples. Each sample contains weather-related features and whether it is suitable for tennis.

Step 1: Import dependencies

In [2]:
import pandas as pd
import numpy as np
from sklearn import tree
import pydotplus

Step 2: Define the function for generating a decision tree.

In [4]:
#Generate a decision tree

def createTree(trainingData):
    data = trainingData.iloc[:, :-1]   #Feature Matrix
    labels = trainingData.iloc[:, -1]   #Labels
    trainedTree = tree.DecisionTreeClassifier(criterion="entropy")   #Decision tree classifier
    trainedTree.fit(data, labels)   #Train the model.
    return trainedTree

Step 3: Define the function for saving the generated tree diagram.

In [6]:
def showtree2pdf(trainedTree, filename):
    dot_data = tree.export_graphviz(trainedTree, out_file=None)   #Export the tree in Graphviz format.
    graph = pydotplus.graph_from_dot_data(dot_data)
    graph.write_pdf(filename)   #Save the tree diagram to the local machine in PDF format.

Step 4: Define the function for generating vectorized data.

In the function, pd.Categorical(list).codes obtains the sequences number list corresponding to the original data, so as to convert the categorical information into numerical information

In [8]:
def data2vectoc(data):
    names = data.columns[:-1]
    for i in names:
        col = pd.Categorical(data[i])
        data[i] = col.codes
    return data

Step 5: Invoke the function for prediction 

In [10]:
data = pd.read_table("../Practicals-Dataset/tennis.txt", header=None, sep='\t')   #Read the training data.
trainingvec = data2vectoc(data)   #Vectorize the data
decisionTree = createTree(trainingvec)   #Create a decision tree
showtree2pdf(decisionTree, "tennis.pdf")   #Plot the decision tree

Predict new sample

In [19]:
testVec = [0, 0, 1, 1]   #Weather is sunny, temperature is low, humidity is high, and wind is strong.
print(decisionTree.predict(np.array(testVec).reshape(1, -1)))   #Predict.

['Y']
