Skip to content

Commit

Permalink
made python unit tests work with new test server setup
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanrijn committed Mar 24, 2017
1 parent 7236528 commit f9bf4f2
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 42 deletions.
14 changes: 7 additions & 7 deletions tests/test_datasets/test_dataset_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ def test_list_datasets(self):
# data from the internet...
datasets = openml.datasets.list_datasets()
# 1087 as the number of datasets on openml.org
self.assertGreaterEqual(len(datasets), 1087)
self.assertGreaterEqual(len(datasets), 100)
for did in datasets:
self._check_dataset(datasets[did])

def test_list_datasets_by_tag(self):
datasets = openml.datasets.list_datasets(tag='uci')
self.assertGreaterEqual(len(datasets), 5)
datasets = openml.datasets.list_datasets(tag='study_14')
self.assertGreaterEqual(len(datasets), 100)
for did in datasets:
self._check_dataset(datasets[did])

Expand Down Expand Up @@ -153,20 +153,20 @@ def test_get_dataset(self):
openml.config.get_cache_directory(), "datasets", "1", "qualities.xml")))

def test_get_dataset_with_string(self):
dataset = openml.datasets.get_dataset(373)
dataset = openml.datasets.get_dataset(101)
self.assertRaises(PyOpenMLError, dataset._get_arff, 'arff')
self.assertRaises(PyOpenMLError, dataset.get_data)

def test_get_dataset_sparse(self):
dataset = openml.datasets.get_dataset(1571)
dataset = openml.datasets.get_dataset(102)
X = dataset.get_data()
self.assertIsInstance(X, scipy.sparse.csr_matrix)

def test_download_rowid(self):
# Smoke test which checks that the dataset has the row-id set correctly
did = 164
did = 44
dataset = openml.datasets.get_dataset(did)
self.assertEqual(dataset.row_id_attribute, 'instance')
self.assertEqual(dataset.row_id_attribute, 'Counter')

def test__get_dataset_description(self):
description = _get_dataset_description(self.workdir, 2)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_flows/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def test_get_flow(self):
def test_from_xml_to_xml(self):
# Get the raw xml thing
# TODO maybe get this via get_flow(), which would have to be refactored to allow getting only the xml dictionary
for flow_id in [1185, 1244, 1196, 1112, ]:
# TODO: no sklearn flows.
for flow_id in [3, 5, 7, 9, ]:
flow_xml = _perform_api_call("flow/%d" % flow_id)
flow_dict = xmltodict.parse(flow_xml)

Expand Down
67 changes: 44 additions & 23 deletions tests/test_runs/test_run_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _perform_run(self, task_id, num_instances, clf):
return run

def test_run_regression_on_classif_task(self):
task_id = 10107
task_id = 115

clf = LinearRegression()
task = openml.tasks.get_task(task_id)
Expand All @@ -43,7 +43,7 @@ def test_run_regression_on_classif_task(self):

@mock.patch('openml.flows.sklearn_to_flow')
def test_check_erronous_sklearn_flow_fails(self, sklearn_to_flow_mock):
task_id = 10107
task_id = 115
task = openml.tasks.get_task(task_id)

# Invalid parameter values
Expand All @@ -52,16 +52,16 @@ def test_check_erronous_sklearn_flow_fails(self, sklearn_to_flow_mock):
self.assertRaisesRegexp(ValueError, "Penalty term must be positive; got \(C='abc'\)",
openml.runs.run_task, task=task, model=clf)

def test_run_iris(self):
task_id = 10107
num_instances = 150
def test_run_diabetes(self):
task_id = 115
num_instances = 768

clf = LogisticRegression()
self._perform_run(task_id,num_instances, clf)

def test_run_optimize_randomforest_iris(self):
task_id = 10107
num_instances = 150
task_id = 115
num_instances = 768
num_folds = 10
num_iterations = 5

Expand All @@ -80,8 +80,8 @@ def test_run_optimize_randomforest_iris(self):
self.assertEqual(len(run.trace_content), num_iterations * num_folds)

def test_run_optimize_bagging_iris(self):
task_id = 10107
num_instances = 150
task_id = 115
num_instances = 768
num_folds = 10
num_iterations = 9 # (num values for C times gamma)

Expand All @@ -94,8 +94,8 @@ def test_run_optimize_bagging_iris(self):
self.assertEqual(len(run.trace_content), num_iterations * num_folds)

def test_run_pipeline(self):
task_id = 10107
num_instances = 150
task_id = 115
num_instances = 768
num_folds = 10
num_iterations = 9 # (num values for C times gamma)

Expand All @@ -107,8 +107,11 @@ def test_run_pipeline(self):
self.assertEqual(run.trace_content, None)

def test__run_task_get_arffcontent(self):
task = openml.tasks.get_task(1939)
task = openml.tasks.get_task(7)
class_labels = task.class_labels
num_instances = 3196
num_folds = 10
num_repeats = 1

clf = SGDClassifier(loss='hinge', random_state=1)
self.assertRaisesRegexp(AttributeError,
Expand All @@ -125,20 +128,24 @@ def test__run_task_get_arffcontent(self):
self.assertIsInstance(arff_tracecontent, type(None))

# 10 times 10 fold CV of 150 samples
self.assertEqual(len(arff_datacontent), 1500)
self.assertEqual(len(arff_datacontent), num_instances * num_repeats)
for arff_line in arff_datacontent:
self.assertEqual(len(arff_line), 8)
print(arff_line)
# check number columns
self.assertEqual(len(arff_line), 7)
# check repeat
self.assertGreaterEqual(arff_line[0], 0)
self.assertLessEqual(arff_line[0], 9)
self.assertLessEqual(arff_line[0], num_repeats - 1)
# check fold
self.assertGreaterEqual(arff_line[1], 0)
self.assertLessEqual(arff_line[1], 9)
self.assertLessEqual(arff_line[1], num_folds - 1)
# check row id
self.assertGreaterEqual(arff_line[2], 0)
self.assertLessEqual(arff_line[2], 149)
self.assertAlmostEqual(sum(arff_line[3:6]), 1.0)
self.assertIn(arff_line[6], ['Iris-setosa', 'Iris-versicolor',
'Iris-virginica'])
self.assertIn(arff_line[7], ['Iris-setosa', 'Iris-versicolor',
'Iris-virginica'])
self.assertLessEqual(arff_line[2], num_instances - 1)
# check confidences
self.assertAlmostEqual(sum(arff_line[3:5]), 1.0)
self.assertIn(arff_line[5], ['won', 'nowin'])
self.assertIn(arff_line[6], ['won', 'nowin'])

def test_get_run(self):
# this run is not available on test
Expand All @@ -163,12 +170,16 @@ def _check_run(self, run):
self.assertEqual(len(run), 5)

def test_get_runs_list(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
runs = openml.runs.list_runs(id=[2])
self.assertEqual(len(runs), 1)
for rid in runs:
self._check_run(runs[rid])

def test_get_runs_list_by_task(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
task_ids = [20]
runs = openml.runs.list_runs(task=task_ids)
self.assertGreaterEqual(len(runs), 590)
Expand All @@ -185,6 +196,8 @@ def test_get_runs_list_by_task(self):
self._check_run(runs[rid])

def test_get_runs_list_by_uploader(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
# 29 is Dominik Kirchhoff - Joaquin and Jan have too many runs right now
uploader_ids = [29]

Expand All @@ -204,6 +217,8 @@ def test_get_runs_list_by_uploader(self):
self._check_run(runs[rid])

def test_get_runs_list_by_flow(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
flow_ids = [1154]
runs = openml.runs.list_runs(flow=flow_ids)
self.assertGreaterEqual(len(runs), 1)
Expand All @@ -220,6 +235,8 @@ def test_get_runs_list_by_flow(self):
self._check_run(runs[rid])

def test_get_runs_pagination(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
uploader_ids = [1]
size = 10
max = 100
Expand All @@ -230,9 +247,11 @@ def test_get_runs_pagination(self):
self.assertIn(runs[rid]["uploader"], uploader_ids)

def test_get_runs_list_by_filters(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
ids = [505212, 6100]
tasks = [2974, 339]
uploaders_1 = [1, 17]
uploaders_1 = [1, 2]
uploaders_2 = [29, 274]
flows = [74, 1718]

Expand All @@ -253,6 +272,8 @@ def test_get_runs_list_by_filters(self):
runs = openml.runs.list_runs(id=ids, task=tasks, uploader=uploaders_1)

def test_get_runs_list_by_tag(self):
# TODO: comes from live, no such lists on test
openml.config.server = self.production_server
runs = openml.runs.list_runs(tag='curves')
self.assertGreaterEqual(len(runs), 1)

Expand Down
10 changes: 3 additions & 7 deletions tests/test_tasks/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@

class OpenMLTaskTest(TestBase):

def test_get_clustering_task(self):
self.assertRaisesRegexp(KeyError, 'oml:target_feature',
openml.tasks.get_task, 10128)

@mock.patch('openml.datasets.get_dataset', autospec=True)
def test_get_dataset(self, patch):
patch.return_value = mock.MagicMock()
Expand All @@ -40,11 +36,11 @@ def test_get_X_and_Y(self):
self.assertEqual(Y.dtype, int)

# Regression task
task = openml.tasks.get_task(2280)
task = openml.tasks.get_task(631)
X, Y = task.get_X_and_y()
self.assertEqual((8192, 8), X.shape)
self.assertEqual((52, 2), X.shape)
self.assertIsInstance(X, np.ndarray)
self.assertEqual((8192,), Y.shape)
self.assertEqual((52,), Y.shape)
self.assertIsInstance(Y, np.ndarray)
self.assertEqual(Y.dtype, float)

Expand Down
8 changes: 4 additions & 4 deletions tests/test_tasks/test_task_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,15 @@ def test_list_tasks_by_type(self):
self._check_task(tasks[tid])

def test_list_tasks_by_tag(self):
num_basic_tasks = 54 # number is flexible, check server if fails
tasks = openml.tasks.list_tasks(tag='basic')
num_basic_tasks = 100 # number is flexible, check server if fails
tasks = openml.tasks.list_tasks(tag='study_14')
self.assertGreaterEqual(len(tasks), num_basic_tasks)
for tid in tasks:
self._check_task(tasks[tid])

def test_list_tasks(self):
tasks = openml.tasks.list_tasks()
self.assertGreaterEqual(len(tasks), 2000)
self.assertGreaterEqual(len(tasks), 900)
for tid in tasks:
self._check_task(tasks[tid])

Expand All @@ -83,7 +83,7 @@ def test_list_tasks_paginate(self):
def test_list_tasks_per_type_paginate(self):
size = 10
max = 100
task_types = 5
task_types = 4
for j in range(1,task_types):
for i in range(0, max, size):
tasks = openml.tasks.list_tasks(task_type_id=j, offset=i, size=size)
Expand Down

0 comments on commit f9bf4f2

Please sign in to comment.