In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

In [None]:
#Load dataset
zoo = pd.read_csv('zoo.csv')
zoo.head()

In [None]:
# Trainig dataset
X = zoo.values[:, :16]
# Target
y = zoo.values[:,-1:]
print(X.shape)

In [None]:
feature_cols=('hair','feathers','eggs','milk','airborne','aquatic','predator','toothed','backbone','breathes','venomous','fins','legs','tail','domestic','catsize')
unique_elements, counts_elements = np.unique(y, return_counts=True)
print(np.asarray((unique_elements, counts_elements)))

### Decision Tree
> class sklearn.tree.DecisionTreeClassifier(**criterion**='gini', **splitter**='best', **max_depth**=None, **min_samples_split**=2, **min_samples_leaf**=1, **min_weight_fraction_leaf**=0.0, **max_features**=None, **random_state**=None, **max_leaf_nodes**=None, **min_impurity_decrease**=0.0, **min_impurity_split**=None, **class_weight**=None, **presort**='deprecated', **ccp_alpha**=0.0)

In [None]:
#CART ALGORITHM
#max_depth: poda da árvore. O nodo raiz não conta pois possui todos os dados e não é uma ramificação. If None, 
#then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.
#min_samples_leaf: quantidade mínima de sample nos nodos finais (folhas)

tree = DecisionTreeClassifier(criterion = 'gini', random_state=100,max_depth=3,min_samples_leaf=5)
tree.fit(X,y)
#Cross Validation
predictions = cross_val_predict(tree,X,y,cv=10)

In [None]:
#Compute accuracy
accuracy_score(y,predictions)*100
#print("The prediction accuracy is: ",tree.score(X,y)*100,"%")

In [None]:
cf = confusion_matrix(y,predictions)
lbl1=['amphibian', 'bird', 'fish', 'insect', 'invertebrate', 'mammal', 'reptile']
lbl2 = ['amphibian', 'bird', 'fish', 'insect', 'invertebrate', 'mammal', 'reptile']
sns.heatmap(cf,annot=True,cmap="Greens", fmt="d",xticklabels=lbl1,yticklabels=lbl2)

## 4 - Avaliação
![Acurácia](https://cdn.discordapp.com/attachments/666438977364164630/671551093032878110/acuracia.png)
![Precisão](https://cdn.discordapp.com/attachments/666438977364164630/671551101627006991/precisao.png)
![Recall](https://cdn.discordapp.com/attachments/666438977364164630/671551103946457089/recall.png)
![F1](https://cdn.discordapp.com/attachments/666438977364164630/671551099567734804/f1.png)

In [None]:
from sklearn.metrics import classification_report
#Gera a matriz de confusão do test
print(classification_report(y,predictions))

In [None]:
#Generate the tree in a text format
from sklearn.tree import export_text
r = export_text(tree, feature_names=feature_cols)
print(r)

In [None]:
from sklearn import tree
tree.plot_tree(tree) 

In [None]:
#Visualization
#pip install graphviz -- for windows: conda install -c anaconda graphviz -- restart machine
#pip install pydotplus

In [None]:
from sklearn.externals.six import StringIO
from IPython.display import Image
#converts decision tree classifier into dot file and 
from sklearn.tree import export_graphviz
#convert this dot file to png or displayable form on Jupyter.
import pydotplus

In [None]:
dot_data = StringIO()

export_graphviz(tree, out_file=dot_data, rounded=True,filled=True, 
                special_characters=True, feature_names=feature_cols,
                class_names=['amphibian','bird','fish','insect','invertebrate','mammal','reptile'])

graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('decisiontree.png')
Image(graph.create_png())