In [1]:
import os
import pickle
import pandas as pd
import numpy as np
import mercury as mr
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [2]:
app = mr.App(title="Train Random Forest", description="Train Random Forest on Iris dataset",
            continuous_update=False)

# Random Forest training

We will train Random Forest model on Iris dataset.

Please select:
- size of test dataset,
- number of trees in the Random Forest,
- max depth of trees in the Random Forest,
- output file name.

Model will be saved in the output directory.

In [3]:
X, y = load_iris(return_X_y=True, as_frame=True)

In [4]:
test_size = mr.Numeric(label="Test size", value=0.3, min=0.1, max=0.9, step=0.05)

mercury.Numeric

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size.value)

In [6]:
mr.Markdown(f"### Training samples {X_train.shape[0]}")

### Training samples 105

In [7]:
mr.Markdown(f"### Training samples {X_test.shape[0]}")

### Training samples 45

In [8]:
trees = mr.Slider(label="Number of trees", value=1, min=1, max=20)

mercury.Slider

In [9]:
depth = mr.Slider(label="Max depth", value=1, min=1, max=5)

mercury.Slider

In [10]:
rf = RandomForestClassifier(n_estimators=trees.value, max_depth=depth.value)

In [11]:
rf.fit(X_train, y_train)

In [12]:
y_predicted = rf.predict(X_test)

In [13]:
accuracy = np.round(np.sum(y_test == y_predicted)/y_test.shape[0], 4)

In [14]:
mr.Markdown(f"## Model accuracy {accuracy}")

## Model accuracy 0.6667

In [15]:
output_dir = mr.OutputDir()

In [16]:
filename = mr.Text(label="Model filename", value = "random-forest")

mercury.Text

In [17]:
with open(os.path.join(output_dir.path, f"{filename.value}.pickle"), "wb") as fout:
    pickle.dump(rf, fout)
    