Skip to content

Commit

Permalink
small code cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Shihab-Shahriar committed May 29, 2020
1 parent 0b9ebf3 commit 30e4c49
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 19 deletions.
19 changes: 8 additions & 11 deletions examples/random_forest.py
@@ -1,9 +1,7 @@
from timeit import default_timer
start = default_timer()
import logging

import numpy as np
from sklearn.datasets import make_classification, load_boston, load_digits, load_breast_cancer, load_iris
from sklearn.datasets import make_classification
from sklearn.datasets import make_regression
from sklearn.metrics import roc_auc_score, accuracy_score

Expand All @@ -23,16 +21,17 @@ def classification():
X, y = make_classification(
n_samples=500, n_features=10, n_informative=10, random_state=1111, n_classes=2, class_sep=2.5, n_redundant=0
)
#X,y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=1111)

model = RandomForestClassifier(n_estimators=5, max_depth=4)
model = RandomForestClassifier(n_estimators=10, max_depth=4)
model.fit(X_train, y_train)
predictions = model.predict(X_test)[:,1]
#predictions = np.argmax(model.predict(X_test),axis=1)
print(predictions.shape)
print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions))

predictions_prob = model.predict(X_test)[:, 1]
predictions = np.argmax(model.predict(X_test), axis=1)
#print(predictions.shape)
print("classification, roc auc score: %s" % roc_auc_score(y_test, predictions_prob))
print("classification, accuracy score: %s" % accuracy_score(y_test, predictions))


def regression():
Expand All @@ -51,5 +50,3 @@ def regression():
if __name__ == "__main__":
classification()
# regression()
end = default_timer()
print(end-start)
6 changes: 1 addition & 5 deletions mla/ensemble/random_forest.py
Expand Up @@ -80,14 +80,10 @@ def _predict(self, X=None):
for i in range(X.shape[0]):
row_pred = np.zeros(y_shape)
for tree in self.trees:
tmp = tree.predict_row(X[i, :])
print(tmp,row_pred.shape,row_pred)
row_pred += tmp

row_pred += tree.predict_row(X[i, :])

row_pred /= self.n_estimators
predictions[i, :] = row_pred
print(f"i={i},{row_pred}\n")
return predictions


Expand Down
5 changes: 2 additions & 3 deletions mla/ensemble/tree.py
Expand Up @@ -65,7 +65,7 @@ def _find_best_split(self, X, target, n_features):
return max_col, max_val, max_gain

def train(self, X, target, max_features=None, min_samples_split=10, max_depth=None,
minimum_gain=0.01, loss=None, n_classes = None):
minimum_gain=0.01, loss=None, n_classes=None):
"""Build a decision tree from training set.
Parameters
Expand All @@ -85,7 +85,7 @@ def train(self, X, target, max_features=None, min_samples_split=10, max_depth=No
Minimum gain required for splitting.
loss : function, default None
Loss function for gradient boosting.
n_classes : int, default None
n_classes : int or None
No of unique labels in case of classification
"""

Expand Down Expand Up @@ -143,7 +143,6 @@ def _calculate_leaf_value(self, targets, n_classes):
self.outcome = np.mean(targets["y"])
else:
# Probability for classification task
#self.outcome = stats.itemfreq(targets["y"])[:, 1] / float(targets["y"].shape[0])
self.outcome = np.bincount(targets["y"], minlength=n_classes) / targets["y"].shape[0]

def predict_row(self, row):
Expand Down

0 comments on commit 30e4c49

Please sign in to comment.