In [26]:
from cuml.datasets.classification import make_classification
from cuml.preprocessing.model_selection import train_test_split
from cuml.ensemble import RandomForestClassifier as cuRF
from sklearn.metrics import zero_one_loss
from sklearn.metrics import accuracy_score
import cupy as cp
from cupy import asnumpy
import time

In [27]:
n_samples = 2**18
n_features = 10
n_classes = 2

# random forest depth and size
n_estimators = 25
max_depth = 10

In [28]:
X, y = make_classification ( n_classes = n_classes,
                             n_features = n_features,
                             n_samples = n_samples,
                             random_state = 0 )

X_train, X_test, y_train, y_test = train_test_split( X, y, random_state = 0 )

model = cuRF( max_depth = max_depth, 
              n_estimators = n_estimators,
             )

trained_RF = model.fit ( X_train, y_train )

predictions = model.predict ( X_test )

In [29]:
def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):
    score = accuracy_score(y_true, y_pred)
    if normalize:
        return 1 - score
    else:
        if sample_weight is not None:
            samples = cp.sum(sample_weight)
        else:
            samples = len(y_true)
            print(samples)
        return samples - score

In [30]:
y_test = cp.asnumpy(y_test)
predictions = cp.asnumpy(predictions)


p = time.time()
sk_score = zero_one_loss( asnumpy( y_test ), asnumpy( predictions ) )
d = time.time()
print("Time consumed by sklearn: ", d-p)
s = time.time()
cu_score = zero_one_loss( y_test, predictions)
e = time.time()
print("Time consumed by cuml: ", e-s)

print( " cuml accuracy: ", cu_score )
print( " sklearn accuracy : ", sk_score )

Time consumed by sklearn:  0.006992340087890625
Time consumed by cuml:  0.006275653839111328
 cuml accuracy:  0.0160369873046875
 sklearn accuracy :  0.0160369873046875
