In [None]:
!pip -q install graphviz
!apt-get -qy install graphviz

In [None]:
from sklearn import tree, ensemble, metrics
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import graphviz

In [None]:
iris = load_iris()
print(iris.feature_names)
print(iris.data[:10])

In [None]:
# Split the data set into training and test
X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target,
        test_size=0.33, random_state=123)

In [None]:
# http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html#sklearn.tree.DecisionTreeClassifier
clf = ensemble.RandomForestClassifier(n_estimators=10, max_depth=3)
clf = clf.fit(X_train, y_train)

In [None]:
# Plot one of the trees
tree_number = 1

dot_data = tree.export_graphviz(clf.estimators_[tree_number], out_file=None) 
graph = graphviz.Source(dot_data) 
graph.render("iris") 

dot_data = tree.export_graphviz(clf.estimators_[tree_number], out_file=None, 
                         feature_names=iris.feature_names,  
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)  
graph = graphviz.Source(dot_data)  
graph

In [None]:
# Predict the full data set
expected = y_test
predicted = clf.predict(X_test)

In [None]:
# Display metrics
# Precision measures the impact of false positives: TP/(TP+FP)
# Recall measures the impact of false negatives : TP/(TP+FN)
# F1 is the weighted average of precision and recall: (2*Recall*Precision)/(Recall+Precision)
print(metrics.classification_report(expected, predicted))

In [None]:
# Display confusion matrix
print(metrics.confusion_matrix(expected, predicted))

In [None]:
# Display feature importance
print(clf.feature_importances_)