In [None]:
import random

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import export_text, plot_tree
from sklearn import metrics
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt



random.seed()

In [None]:
df = pd.read_csv('../data/iris.csv')
df['class'] = np.where(df["class"]=="setosa",0,
                            np.where(df["class"]=="versicolor",1,2))

df

In [None]:
independent_variables = df.drop('class', axis=1)
x = independent_variables.values
y = df['class'].values

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.8, test_size = 0.2, random_state = random.randrange(99999), shuffle = True)

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(x_train, y_train)
y_pred_train = rf_classifier.predict(x_train)
y_pred_test = rf_classifier.predict(x_test)

print('Training Accuracy {}'.format(metrics.accuracy_score(y_train, y_pred_train)))

In [None]:
print('Training Confusion = \n{}'.format(metrics.confusion_matrix(y_train, y_pred_train, labels=[0,1,2])))

In [None]:
print(classification_report(y_train, y_pred_train))

In [None]:
print('Test Accuracy {}'.format(metrics.accuracy_score(y_test, y_pred_test)))

In [None]:
print('Test Confusion = \n{}'.format(metrics.confusion_matrix(y_test, y_pred_test, labels=[0,1,2])))

In [None]:
print(classification_report(y_test, y_pred_test))

In [None]:
# View the structure of the first tree in the forest
tree_0 = rf_classifier.estimators_[0]
tree_structure = export_text(tree_0, feature_names=list(df.drop('class', axis=1).columns))
print(tree_structure)

In [None]:
# Visualize the first tree in the forest
plt.figure(figsize=(20, 10))
plot_tree(tree_0, feature_names=list(df.drop('class', axis=1).columns), class_names=True, filled=True)
plt.show()

In [None]:
# View the structure of the last tree in the forest
tree_99 = rf_classifier.estimators_[99]
tree_structure = export_text(tree_99, feature_names=list(df.drop('class', axis=1).columns))
print(tree_structure)

In [None]:
# Visualize the last tree in the forest
plt.figure(figsize=(20, 10))
plot_tree(tree_99, feature_names=list(df.drop('class', axis=1).columns), class_names=True, filled=True)
plt.show()