# Load Train Set

features (3681) | labels (1)

In [1]:
# import pandas as pd
import cudf
import cupy
import pandas as pd
import numpy as np

# df = pd.read_feather("dts/np_dataset_train.ftr")
df = pd.read_csv("dts/np_dataset_train.csv")
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40000 entries, 0 to 39999
Columns: 3684 entries, Unnamed: 0 to Label
dtypes: bool(1), float64(3682), int64(1)
memory usage: 1.1 GB


## train/test Split

In [2]:
from sklearn.model_selection import train_test_split

rs = 37

train, test = train_test_split(df, test_size=0.2, random_state=rs)
train.info()
test.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32000 entries, 15893 to 34703
Columns: 3684 entries, Unnamed: 0 to Label
dtypes: bool(1), float64(3682), int64(1)
memory usage: 899.4 MB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 8000 entries, 273 to 564
Columns: 3684 entries, Unnamed: 0 to Label
dtypes: bool(1), float64(3682), int64(1)
memory usage: 224.9 MB


## Classifier definition

In [3]:
from random import seed
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier as tree_classifier
from scipy import stats
import cuml

# from cuml import tree_classifier


class random_forest():
    def __init__(self, number_of_trees=1, number_of_iteractions=1, max_depth=1, random_state=1):
        self.iteractions = number_of_iteractions
        self.number_of_trees = number_of_trees
        self.max_depth = max_depth
        self.forest = []
        self.random_state = random_state

    def bootstrap(self, x_samples):
        """ Create subsets of the input, and returns the indexes 
            from the original sample."""
        index = []

        sample_length = int(np.floor(len(x_samples)/self.number_of_trees))
        np.random.RandomState(self.random_state)

        index = np.random.randint(
            0, sample_length, (self.number_of_trees, sample_length))

        return index

    def predict(self, X):
        y = []
        for tree in self.forest:
            y.append(tree.predict(X))
        mode_ = stats.mode(y, axis=0)
        mode_ = np.transpose(mode_[0])
        return mode_

    def fit(self, x_samples, y_samples):
        idx = self.bootstrap(x_samples)

        for t in range(self.number_of_trees):
            tree = tree_classifier(
                criterion="gini", max_depth=self.max_depth, random_state=self.random_state)
            tree.fit(x_samples.iloc[idx[t]], y_samples[idx[t]])
            self.forest.append(tree)


In [11]:
# from sklearn import svm
# from cuml.svm import SVC
# import random_forest as rf


clf = cuml.ensemble.RandomForestClassifier(
    n_estimators=100,
    random_state=rs
)

# clf = random_forest(number_of_trees=10, number_of_iteractions=1, max_depth=1)


  return func(**kwargs)


## Sending Data to GPU

In [12]:
x = cudf.DataFrame(train.iloc[:, :-1])
x = x.astype(cupy.float32)
y = cupy.array(train.iloc[:, -1])
y = y.astype(cupy.float32)

# x = pd.DataFrame(train.iloc[:, :-1])
# y = np.array(train.iloc[:, -1])

print(x.shape)
print(y.shape)


(32000, 3683)
(32000,)


## Model Fitting

In [13]:
clf.fit(x, y)

[W] [15:26:26.128251] Using experimental backend for growing trees



RandomForestClassifier()

## Testing

In [14]:
# clf.predict_model = 'CPU'
tmp = test.iloc[:, :-1].to_numpy()
tmp = np.float32(tmp)
# x_test = cupy.DataFrame(tmp)
# print(type(tmp))
# tmp
# f32 = tmp.astype(cupy.float32)
# f32
y_pred = clf.predict(tmp)


## Evaluating

In [16]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score



accuracy = accuracy_score(y_true=test.iloc[:, -1], y_pred=y_pred)
roc = roc_auc_score(test.iloc[:, -1], y_pred)
pre = precision_score(y_true=test.iloc[:, -1], y_pred=y_pred)
rec = recall_score(y_true=test.iloc[:, -1], y_pred=y_pred)


print("Metrics: \n Acc: {acc:.2f}, ROC: {roc:.2f}\n PRE: {pre:.2f}, REC: {rec:.2f}".format(acc=accuracy,roc=roc,pre=pre,rec=rec))


Metrics: 
 Acc: 1.00, ROC: 1.00
 PRE: 1.00, REC: 1.00


## SC graph

In [9]:
accs = []
rocs = []
pres = []
recs = []


for rs in range(100):
    train, test = train_test_split(df, test_size=0.2, random_state=rs)

    clf = cuml.ensemble.RandomForestClassifier(
        n_estimators=30,
        random_state=rs
    )

    x = cudf.DataFrame(train.iloc[:, :-1])
    x = x.astype(cupy.float32)
    y = cupy.array(train.iloc[:, -1])
    y = y.astype(cupy.float32)

    clf.fit(x, y)
    x_test = test.iloc[:, :-1].to_numpy()
    x_test = np.float32(x_test)
    y_pred = clf.predict(x_test)

    accuracy = accuracy_score(y_true=test.iloc[:, -1], y_pred=y_pred)
    accs.append(accuracy)
    roc = roc_auc_score(test.iloc[:, -1], y_pred)
    rocs.append(roc)
    pre = precision_score(y_true=test.iloc[:, -1], y_pred=y_pred)
    pres.append(pre)
    rec = recall_score(y_true=test.iloc[:, -1], y_pred=y_pred)
    recs.append(rec)

    print("{i}/100".format(i=rs+1))


  return func(**kwargs)


[W] [14:48:19.055493] Using experimental backend for growing trees

1/100


  return func(**kwargs)


[W] [14:48:26.588504] Using experimental backend for growing trees

2/100


  return func(**kwargs)


[W] [14:48:34.139396] Using experimental backend for growing trees

3/100


  return func(**kwargs)


[W] [14:48:41.683730] Using experimental backend for growing trees

4/100


  return func(**kwargs)


[W] [14:48:49.476011] Using experimental backend for growing trees

5/100


  return func(**kwargs)


[W] [14:48:56.916571] Using experimental backend for growing trees

6/100


  return func(**kwargs)


[W] [14:49:05.630447] Using experimental backend for growing trees

7/100


  return func(**kwargs)


[W] [14:49:13.771060] Using experimental backend for growing trees

8/100


  return func(**kwargs)


[W] [14:49:22.643161] Using experimental backend for growing trees

9/100


  return func(**kwargs)


[W] [14:49:34.237187] Using experimental backend for growing trees

10/100


  return func(**kwargs)


[W] [14:49:52.346491] Using experimental backend for growing trees

11/100


  return func(**kwargs)


[W] [14:50:12.299961] Using experimental backend for growing trees

12/100


  return func(**kwargs)


[W] [14:50:21.817884] Using experimental backend for growing trees

13/100


  return func(**kwargs)


[W] [14:50:34.837138] Using experimental backend for growing trees

14/100


  return func(**kwargs)


[W] [14:50:54.469484] Using experimental backend for growing trees

15/100


  return func(**kwargs)


[W] [14:51:06.302954] Using experimental backend for growing trees

16/100


  return func(**kwargs)


[W] [14:51:27.141269] Using experimental backend for growing trees

17/100


  return func(**kwargs)


[W] [14:51:41.592162] Using experimental backend for growing trees

18/100


  return func(**kwargs)


[W] [14:51:56.228759] Using experimental backend for growing trees

19/100


  return func(**kwargs)


[W] [14:52:19.401032] Using experimental backend for growing trees

20/100


  return func(**kwargs)


[W] [14:52:39.633042] Using experimental backend for growing trees

21/100


  return func(**kwargs)


[W] [14:53:00.707649] Using experimental backend for growing trees

22/100


  return func(**kwargs)


[W] [14:53:13.064860] Using experimental backend for growing trees

23/100


  return func(**kwargs)


[W] [14:53:29.882367] Using experimental backend for growing trees

24/100


  return func(**kwargs)


[W] [14:53:53.514970] Using experimental backend for growing trees

25/100


  return func(**kwargs)


[W] [14:54:10.035895] Using experimental backend for growing trees

26/100


  return func(**kwargs)


[W] [14:54:29.400365] Using experimental backend for growing trees

27/100


  return func(**kwargs)


[W] [14:54:46.175753] Using experimental backend for growing trees

28/100


  return func(**kwargs)


[W] [14:55:05.812566] Using experimental backend for growing trees

29/100


  return func(**kwargs)


[W] [14:55:16.878408] Using experimental backend for growing trees

30/100


  return func(**kwargs)


[W] [14:55:26.697249] Using experimental backend for growing trees

31/100


  return func(**kwargs)


[W] [14:55:36.873852] Using experimental backend for growing trees

32/100


  return func(**kwargs)


[W] [14:55:50.499094] Using experimental backend for growing trees

33/100


  return func(**kwargs)


[W] [14:56:07.668637] Using experimental backend for growing trees

34/100


  return func(**kwargs)


[W] [14:56:28.500776] Using experimental backend for growing trees

35/100


  return func(**kwargs)


[W] [14:56:46.593901] Using experimental backend for growing trees

36/100


  return func(**kwargs)


[W] [14:57:02.249555] Using experimental backend for growing trees

37/100


  return func(**kwargs)


[W] [14:57:24.984970] Using experimental backend for growing trees

38/100


  return func(**kwargs)


[W] [14:57:36.356818] Using experimental backend for growing trees

39/100


  return func(**kwargs)


[W] [14:57:46.090980] Using experimental backend for growing trees

40/100


  return func(**kwargs)


[W] [14:57:58.810363] Using experimental backend for growing trees

41/100


  return func(**kwargs)


[W] [14:58:19.009793] Using experimental backend for growing trees

42/100


  return func(**kwargs)


[W] [14:58:39.387040] Using experimental backend for growing trees

43/100


  return func(**kwargs)


[W] [14:59:02.713747] Using experimental backend for growing trees

44/100


  return func(**kwargs)


[W] [14:59:22.452989] Using experimental backend for growing trees

45/100


  return func(**kwargs)


[W] [14:59:41.941074] Using experimental backend for growing trees

46/100


  return func(**kwargs)


[W] [14:59:57.908045] Using experimental backend for growing trees

47/100


  return func(**kwargs)


[W] [15:00:17.274181] Using experimental backend for growing trees

48/100


  return func(**kwargs)


[W] [15:00:39.181725] Using experimental backend for growing trees

49/100


  return func(**kwargs)


[W] [15:01:01.049100] Using experimental backend for growing trees

50/100


  return func(**kwargs)


[W] [15:01:22.728336] Using experimental backend for growing trees

51/100


  return func(**kwargs)


[W] [15:01:41.132438] Using experimental backend for growing trees

52/100


  return func(**kwargs)


[W] [15:01:56.665016] Using experimental backend for growing trees

53/100


  return func(**kwargs)


[W] [15:02:19.939883] Using experimental backend for growing trees

54/100


  return func(**kwargs)


[W] [15:02:36.015584] Using experimental backend for growing trees

55/100


  return func(**kwargs)


[W] [15:02:55.135703] Using experimental backend for growing trees

56/100


  return func(**kwargs)


[W] [15:03:18.954815] Using experimental backend for growing trees

57/100


  return func(**kwargs)


[W] [15:03:37.147415] Using experimental backend for growing trees

58/100


  return func(**kwargs)


[W] [15:03:55.153997] Using experimental backend for growing trees

59/100


  return func(**kwargs)


[W] [15:04:14.040892] Using experimental backend for growing trees

60/100


  return func(**kwargs)


[W] [15:04:32.346485] Using experimental backend for growing trees

61/100


  return func(**kwargs)


[W] [15:04:50.044366] Using experimental backend for growing trees

62/100


  return func(**kwargs)


[W] [15:04:58.593417] Using experimental backend for growing trees

63/100


  return func(**kwargs)


[W] [15:05:15.718478] Using experimental backend for growing trees

64/100


  return func(**kwargs)


[W] [15:05:41.946729] Using experimental backend for growing trees

65/100


  return func(**kwargs)


[W] [15:06:00.171694] Using experimental backend for growing trees

66/100


  return func(**kwargs)


[W] [15:06:22.136571] Using experimental backend for growing trees

67/100


  return func(**kwargs)


[W] [15:06:42.244285] Using experimental backend for growing trees

68/100


  return func(**kwargs)


[W] [15:07:02.092212] Using experimental backend for growing trees

69/100


  return func(**kwargs)


[W] [15:07:21.487114] Using experimental backend for growing trees

70/100


  return func(**kwargs)


[W] [15:07:47.884883] Using experimental backend for growing trees

71/100


  return func(**kwargs)


[W] [15:08:02.017486] Using experimental backend for growing trees

72/100


  return func(**kwargs)


[W] [15:08:11.788720] Using experimental backend for growing trees

73/100


  return func(**kwargs)


[W] [15:08:26.566080] Using experimental backend for growing trees

74/100


  return func(**kwargs)


[W] [15:08:43.224938] Using experimental backend for growing trees

75/100


  return func(**kwargs)


[W] [15:08:53.756876] Using experimental backend for growing trees

76/100


  return func(**kwargs)


[W] [15:09:02.837409] Using experimental backend for growing trees

77/100


  return func(**kwargs)


[W] [15:09:13.065173] Using experimental backend for growing trees

78/100


  return func(**kwargs)


[W] [15:09:27.671862] Using experimental backend for growing trees

79/100


  return func(**kwargs)


[W] [15:09:40.456790] Using experimental backend for growing trees

80/100


  return func(**kwargs)


[W] [15:10:02.476629] Using experimental backend for growing trees

81/100


  return func(**kwargs)


[W] [15:10:21.973147] Using experimental backend for growing trees

82/100


  return func(**kwargs)


[W] [15:10:31.389048] Using experimental backend for growing trees

83/100


  return func(**kwargs)


[W] [15:10:41.598900] Using experimental backend for growing trees

84/100


  return func(**kwargs)


[W] [15:10:53.821466] Using experimental backend for growing trees

85/100


  return func(**kwargs)


[W] [15:11:22.277175] Using experimental backend for growing trees

86/100


  return func(**kwargs)


[W] [15:11:41.517476] Using experimental backend for growing trees

87/100


  return func(**kwargs)


[W] [15:11:58.397999] Using experimental backend for growing trees

88/100


  return func(**kwargs)


[W] [15:12:10.031702] Using experimental backend for growing trees

89/100


  return func(**kwargs)


[W] [15:12:19.538902] Using experimental backend for growing trees

90/100


  return func(**kwargs)


[W] [15:12:30.451605] Using experimental backend for growing trees

91/100


  return func(**kwargs)


[W] [15:12:45.926152] Using experimental backend for growing trees

92/100


  return func(**kwargs)


[W] [15:13:08.708454] Using experimental backend for growing trees

93/100


  return func(**kwargs)


[W] [15:13:30.254611] Using experimental backend for growing trees

94/100


  return func(**kwargs)


[W] [15:13:42.104052] Using experimental backend for growing trees

95/100


  return func(**kwargs)


[W] [15:14:03.080812] Using experimental backend for growing trees

96/100


  return func(**kwargs)


[W] [15:14:21.136641] Using experimental backend for growing trees

97/100


  return func(**kwargs)


[W] [15:14:31.770831] Using experimental backend for growing trees

98/100


  return func(**kwargs)


[W] [15:14:50.498112] Using experimental backend for growing trees

99/100


  return func(**kwargs)


[W] [15:15:09.891279] Using experimental backend for growing trees

100/100


In [10]:


# import matplotlib.pyplot as plt


# plt.stairs(accs)

# npAccs = np.array(accs)
# npRocs = np.array(rocs)
# npPres = np.array(pres)
# npRecs = np.array(recs)

def min_max(name,array):
    a = np.array(array)
    print("{name}: ({min:.6f},{max:.1f}|{mean:.6f})".format(
        name=name, min=np.min(a), max=np.max(a), mean=np.mean(a)))

min_max("Acc",accs)
min_max("Roc", rocs)
min_max("Pre", pres)
min_max("Rec", recs)


Acc: (0.996000,1.0|0.998907)
Roc: (0.995986,1.0|0.998905)
Pre: (0.996578,1.0|0.998760)
Rec: (0.995165,1.0|0.999059)
