In [1]:
#!pip install graphviz
#!pip install pydotplus

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree
from sklearn import preprocessing
import graphviz
import pydotplus
import collections

In [2]:
golf_data = pd.read_csv('datasets\golf_dataset.csv')

In [3]:
print('Dataset length:', len(golf_data))
print('Data shape:', golf_data.shape)

Dataset length: 14
Data shape: (14, 7)


In [4]:
golf_data.head()

Unnamed: 0,outlook,temp,temperature,hum,humidity,windy,play
0,overcast,83,hot,86,high,False,yes
1,overcast,64,cool,65,normal,True,yes
2,overcast,72,mild,90,high,True,yes
3,overcast,81,hot,75,normal,False,yes
4,rainy,70,mild,96,high,False,yes


In [5]:
le = preprocessing.LabelEncoder()
le_golf_data = golf_data[["outlook", "temperature", "humidity", "windy", "play"]].apply(le.fit_transform)
le_golf_data

Unnamed: 0,outlook,temperature,humidity,windy,play
0,0,1,0,0,1
1,0,0,1,1,1
2,0,2,0,1,1
3,0,1,1,0,1
4,1,2,0,0,1
5,1,0,1,0,1
6,1,0,1,1,0
7,1,2,1,0,1
8,1,2,0,1,0
9,2,1,0,0,0


In [6]:
features = ['outlook', 'temperature', 'humidity', 'windy']
target = ['play']

x = le_golf_data[features]
y = le_golf_data[target]

In [7]:
x

Unnamed: 0,outlook,temperature,humidity,windy
0,0,1,0,0
1,0,0,1,1
2,0,2,0,1
3,0,1,1,0
4,1,2,0,0
5,1,0,1,0
6,1,0,1,1
7,1,2,1,0
8,1,2,0,1
9,2,1,0,0


In [8]:
y

Unnamed: 0,play
0,1
1,1
2,1
3,1
4,1
5,1
6,0
7,1
8,0
9,0


In [9]:
le_play = preprocessing.LabelEncoder()
le_play.fit(golf_data["play"]) 
list(le_play.classes_)

['no', 'yes']

In [10]:
clf_gini = DecisionTreeClassifier(criterion='gini', random_state=100,
                                  max_depth=5, min_samples_leaf=1)
clf_gini.fit(x, y)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=5, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=100, splitter='best')

In [11]:
clf_gini.predict([[2, 0, 0, 1]])

array([0])

In [12]:
print(le_play.inverse_transform(clf_gini.predict([[2, 0, 0, 1]])))

['no']


In [13]:
clf_gini.predict([[2, 1, 1, 0]])

array([1])

In [14]:
clf_entropy = DecisionTreeClassifier(criterion='entropy', random_state=100, 
                                     max_depth=5, min_samples_leaf=1)
clf_entropy.fit(x, y)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=5, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=100, splitter='best')

In [15]:
clf_entropy.predict([[2, 0, 0, 1]])

array([0])

In [16]:
clf_entropy.predict([[2, 1, 1, 0]])

array([1])

In [17]:
print(le_play.inverse_transform(clf_entropy.predict([[2, 0, 0, 1]])))

['no']


In [18]:
!pip install graphviz
!pip install pydotplus



In [19]:
# visualize data
import graphviz
import pydotplus
import collections
data_feature_names = features
dot_data = tree.export_graphviz(clf_entropy,
                                feature_names = features,
                                out_file = None, 
                                filled = True,
                                rounded = True)
graph = pydotplus.graph_from_dot_data(dot_data)
colors = ('turquoise', 'orange')
edges = collections.defaultdict(list)

for edge in graph.get_edge_list():
    edges[edge.get_source()].append(int(edge.get_destination()))
    
for edge in edges:
    edges[edge].sort()
    for i in range(2):
        dest = graph.get_node(str(edges[edge][i]))[0]
        dest.set_fillcolor(colors[i])

graph.write_png('golf_tree_01.png')

InvocationException: GraphViz's executables not found

In [30]:
new_golf = pd.read_csv('datasets\golf_dataset.csv')
ohd_golf = pd.get_dummies(new_golf[ ["outlook", "temperature", "humidity", "windy"]])

In [31]:
clf = tree.DecisionTreeClassifier()
clf_train = clf.fit(ohd_golf, new_golf["play"])
ohd_golf

Unnamed: 0,windy,outlook_overcast,outlook_rainy,outlook_sunny,temperature_cool,temperature_hot,temperature_mild,humidity_high,humidity_normal
0,False,1,0,0,0,1,0,1,0
1,True,1,0,0,1,0,0,0,1
2,True,1,0,0,0,0,1,1,0
3,False,1,0,0,0,1,0,0,1
4,False,0,1,0,0,0,1,1,0
5,False,0,1,0,1,0,0,0,1
6,True,0,1,0,1,0,0,0,1
7,False,0,1,0,0,0,1,0,1
8,True,0,1,0,0,0,1,1,0
9,False,0,0,1,0,1,0,1,0


In [32]:
dot_data = tree.export_graphviz(clf_train, out_file=None, feature_names=list(ohd_golf.columns.values), 
                                class_names=['Not_Play', 'Play'], rounded=True, filled=True)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_png('golf_tree_02.png')

NameError: name 'Image' is not defined