Skip to content

Commit

Permalink
Structured test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanrijn committed Sep 29, 2016
1 parent 0f58378 commit b250547
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 15 deletions.
1 change: 0 additions & 1 deletion openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def _generate_trace_arff_dict(self, model):
arff_dict['data'] = self.trace_content
arff_dict['relation'] = 'openml_task_' + str(self.task_id) + '_predictions'

print(arff_dict)
return arff_dict

def publish(self):
Expand Down
56 changes: 42 additions & 14 deletions tests/runs/test_run_functions.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,66 @@
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
import openml
from openml.testing import TestBase


class TestRun(TestBase):
def test_run_iris(self):
task = openml.tasks.get_task(10107)
clf = LogisticRegression()

def _perform_run(self, task_id, num_instances, clf):
task = openml.tasks.get_task(task_id)
run = openml.runs.run_task(task, clf)
run_ = run.publish()
self.assertEqual(run_, run)
self.assertIsInstance(run.dataset_id, int)

def test_run_optimize_randomforest_iris(self):
task = openml.tasks.get_task(10107)
numIterations = 5
# check arff output
self.assertEqual(len(run.data_content), num_instances)
return run


clf = RandomForestClassifier(n_estimators=numIterations)
def test_run_iris(self):
task_id = 10107
num_instances = 150

clf = LogisticRegression()
self._perform_run(task_id,num_instances, clf)


def test_run_optimize_randomforest_iris(self):
task_id = 10107
num_instances = 150
num_folds = 10
num_iterations = 5

clf = RandomForestClassifier(n_estimators=10)
param_dist = {"max_depth": [3, None],
"max_features": [1,2,3,4],
"min_samples_split": [1,2,3,4,5,6,7,8,9,10],
"min_samples_leaf": [1,2,3,4,5,6,7,8,9,10],
"bootstrap": [True, False],
"criterion": ["gini", "entropy"]}
random_search = RandomizedSearchCV(clf, param_dist,n_iter=20)
random_search = RandomizedSearchCV(clf, param_dist,n_iter=num_iterations)

run = self._perform_run(task_id,num_instances, random_search)
self.assertEqual(len(run.trace_content), num_iterations * num_folds)

def test_run_optimize_bagging_iris(self):
task_id = 10107
num_instances = 150
num_folds = 10
num_iterations = 36 # (num values for C times gamma)

task = openml.tasks.get_task(task_id)
bag = BaggingClassifier(base_estimator=SVC())
param_dist = {"base_estimator__C": [0.001, 0.01, 0.1, 1, 10, 100],
"base_estimator__gamma": [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(bag, param_dist)

run = self._perform_run(task_id, num_instances, grid_search)
self.assertEqual(len(run.trace_content), num_iterations * num_folds)

run = openml.runs.run_task(task, random_search)
run_ = run.publish()
self.assertEqual(run_, run)
self.assertIsInstance(run.dataset_id, int)

def test__run_task_get_arffcontent(self):
task = openml.tasks.get_task(1939)
Expand Down

0 comments on commit b250547

Please sign in to comment.