In [1]:
import os
from joblib import dump

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, f1_score, classification_report

In [2]:
saved_model_path = os.path.join('..', 'jar', 'iris_classifier.joblib')

In [3]:
iris = load_iris()

In [4]:
labels = iris.target_names

In [5]:
X = iris['data']
y = iris['target']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [7]:
model = RandomForestClassifier(
    n_estimators=10,
    n_jobs=-1,
    verbose=True,
)

In [8]:
model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=-1,
            oob_score=False, random_state=None, verbose=True,
            warm_start=False)

In [9]:
y_hat = model.predict(X_test)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  10 out of  10 | elapsed:    0.0s finished


In [10]:
results = confusion_matrix(y_test, y_hat)

In [17]:
accuracy = accuracy_score(y_test, y_hat)
recall = recall_score(y_test, y_hat, average='macro')
f1 = f1_score(y_test, y_hat, average='macro')

print(f"Accuracy: {100 * accuracy:.2f}%")
print(f"Recall: {100 * recall:.2f}%")
print(f"F1-Score: {100 * f1:.2f}%")

Accuracy: 90.00%
Recall: 89.18%
F1-Score: 88.57%


In [18]:
print("Confusion Matrix:")
df = pd.DataFrame(results, columns=labels)
df.set_index(labels, inplace=True)
print(df.to_string(justify='center'), "\n")

Confusion Matrix:
            setosa  versicolor  virginica
setosa        12         0          0    
versicolor     0         9          2    
virginica      0         1          6     



In [12]:
dump(model, saved_model_path)

['../jar/iris_classifier.joblib']