# This is a small example showing classification of various fruits based on their size and color

In [1]:
from cuml import RandomForestClassifier as cuRF
import numpy as np

Here we map fuit label to consecutive integers which is required for Random Forest Classifier

In [2]:
fruit_to_label = {'apple': 0, 'water melon': 1, 'cherry': 2, 'strawberry': 3} 

The dataset below float numbers for fruit size and one-hot encoding (float based) for different color types

In [3]:
# Red, Green, Blue, Size(cm), Fruit (label)
dataset = np.array([[1.0, 0.0, 0.0, 7.0, fruit_to_label['apple']],
                   [0.0, 1.0, 0.0, 20.0, fruit_to_label['water melon']],
                   [1.0, 0.0, 0.0, 1.0, fruit_to_label['cherry']],
                   [0.0, 1.0, 0.0, 7.5, fruit_to_label['apple']],
                   [1.0, 0.0, 0.0, 1.0, fruit_to_label['strawberry']],
                   [1.0, 0.0, 0.0, 0.8, fruit_to_label['cherry']]])

X_train = dataset[:, :-1]
y_train = dataset[:, -1].astype(np.int32)
#print(y_train.shape)

In [4]:
# cuml Random Forest params
cu_rf_params = {
    'n_estimators': 3, #number of trees in RF
    'max_depth': 8, # max depth of each tree
    'n_bins': 2, # number of bins used in split point calculation
    'n_streams': 1, # CUDA stream to use for parallel processing on GPU
    'rows_sample': 0.67, # Percentage of input data to be considered for each tree
    'split_algo': 0 # Split algorithm
}

In [5]:
cu_rf = cuRF(**cu_rf_params)

In [6]:
cu_rf.fit(X_train, y_train)

  """Entry point for launching an IPython kernel.


RandomForestClassifier(n_estimators=3, max_depth=8, handle=<cuml.common.handle.Handle object at 0x7ff103e4a900>, max_features=1.0, n_bins=2, n_streams=1, split_algo=0, split_criterion=0, min_rows_per_node=2, bootstrap=True, bootstrap_features=False, verbose=False, rows_sample=0.67, max_leaves=-1, quantile_per_tree=False)

In [7]:
X_test = np.array([[0.0, 1.0, 0.0, 18.0], [1.0, 0.0, 0.0, 1.0]])

In [8]:
predictions = cu_rf.predict(X_test)
print(predictions)

[1 3]
