In [5]:
import cupy as cp
from cupy import asnumpy 
from cuml.datasets.classification import make_classification
from cuml.preprocessing.model_selection import train_test_split
from cuml.ensemble import RandomForestClassifier as cuRF
from sklearn.metrics import balanced_accuracy_score as balanced
from cuml.metrics import confusion_matrix
import time

# synthetic dataset dimensions
n_samples = 2**20
n_features = 100
n_classes = 4
n_informative = 10

# random forest depth and size
n_estimators = 25
max_depth = 10

# generate synthetic data [ binary classification task ]
X, y = make_classification ( n_classes = n_classes,
                             n_features = n_features,
                             n_samples = n_samples,
                             n_informative = n_informative,
                             random_state = 0 )

X_train, X_test, y_train, y_test = train_test_split( X, y, random_state = 0 )

model = cuRF( max_depth = max_depth, 
              n_estimators = n_estimators,
              random_state  = 0 )

trained_RF = model.fit ( X_train, y_train )

predictions = model.predict ( X_test )

y_test = y_test.astype(cp.int64)
predictions = predictions.astype(cp.int64)

M = confusion_matrix( y_test, predictions , labels=None,
                     sample_weight=None,
                     normalize=None)
classes = cp.unique(y_test)

def balanced_accuracy_score(conf_matrix, classes, adjusted=False):
    rec_list = cp.empty(len(classes))
    for i in range(len(classes)): 
        TP = conf_matrix[i,i]
        D = 0
        for j in range(0, len(classes)):
                D = D + conf_matrix[i, j] #TP+FN
        try:
          rec = TP/D
          rec_list[i] = rec
        except ZeroDivisionError:
          rec_list[i] = 0
    if cp.any(cp.isnan(rec_list)):
        warnings.warn('y_pred contains classes not in y_true')
        rec_list = rec_list[~cp.isnan(rec_list)]
    balanced_score = cp.sum(rec_list)/len(classes)
    if adjusted:
        n_classes = len(rec_list) 
        chance = 1 / n_classes 
        balanced_score -= chance 
        balanced_score /= 1 - chance
    return balanced_score

p = time.time()
cu = balanced_accuracy_score(M, classes)
d = time.time()
print("Time consumed by cuML: ", d-p)

e = time.time()
sk = balanced(asnumpy( y_test ), asnumpy( predictions ))
s = time.time()
print("Time consumed by sklearn: ", s-e)

print("Sklearn output", sk)
print("cuML output", cu)



  model = cuRF( max_depth = max_depth,


Time consumed by cuML:  0.0014052391052246094
Time consumed by sklearn:  0.1957409381866455
Sklearn output 0.6492258356426274
cuML output 0.6492258356426273
