Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
import numpy as np
import warnings
import sklearn
import time
from sklearn.model_selection._search import BaseSearchCV

from ..exceptions import PyOpenMLError
from .. import config
from ..flows import sklearn_to_flow, get_flow, flow_exists
from ..setups import setup_exists
from ..exceptions import OpenMLCacheException, OpenMLServerException
from ..util import URLError
from ..util import URLError, version_complies
from ..tasks.functions import _create_task_from_xml
from .._api_calls import _perform_api_call
from .run import OpenMLRun
Expand Down Expand Up @@ -68,7 +70,6 @@ def run_task(task, model, avoid_duplicate_runs=True):
run = OpenMLRun(task_id=task.task_id, flow_id=None, dataset_id=dataset.dataset_id, model=model)
run.data_content, run.trace_content, run.trace_attributes = _run_task_get_arffcontent(model, task, class_labels)


if flow_id == False:
# means the flow did not exists. As we could run it, publish it now
flow = flow.publish()
Expand Down Expand Up @@ -151,6 +152,7 @@ def _run_task_get_arffcontent(model, task, class_labels):
X, Y = task.get_X_and_y()
arff_datacontent = []
arff_tracecontent = []
user_defined_measures = defaultdict(lambda: defaultdict(dict))

rep_no = 0
# TODO use different iterator to only provide a single iterator (less
Expand All @@ -166,8 +168,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
testY = Y[test_indices]

try:
# for measuring runtime. Only available since Python 3.3
if version_complies(3, 3):
modelfit_starttime = time.process_time()
model_fold.fit(trainX, trainY)

if version_complies(3, 3):
modelfit_duration = (time.process_time() - modelfit_starttime) * 1000
user_defined_measures['usercpu_time_millis_training'][rep_no][fold_no] = modelfit_duration

if isinstance(model_fold, sklearn.model_selection._search.BaseSearchCV):
arff_tracecontent.extend(_extract_arfftrace(model_fold, rep_no, fold_no))
model_classes = model_fold.best_estimator_.classes_
Expand All @@ -177,8 +186,15 @@ def _run_task_get_arffcontent(model, task, class_labels):
# typically happens when training a regressor on classification task
raise PyOpenMLError(str(e))

if version_complies(3, 3):
modelpredict_starttime = time.process_time()
ProbaY = model_fold.predict_proba(testX)
PredY = model_fold.predict(testX)
if version_complies(3, 3):
modelpredict_duration = (time.process_time() - modelpredict_starttime) * 1000
user_defined_measures['usercpu_time_millis_testing'][rep_no][fold_no] = modelpredict_duration
user_defined_measures['usercpu_time_millis'][rep_no][fold_no] = modelfit_duration + modelpredict_duration

if ProbaY.shape[1] != len(class_labels):
warnings.warn("Repeat %d Fold %d: estimator only predicted for %d/%d classes!" %(rep_no, fold_no, ProbaY.shape[1], len(class_labels)))

Expand All @@ -195,7 +211,6 @@ def _run_task_get_arffcontent(model, task, class_labels):
else:
arff_tracecontent = None
arff_trace_attributes = None

return arff_datacontent, arff_tracecontent, arff_trace_attributes


Expand Down Expand Up @@ -397,7 +412,7 @@ def _get_cached_run(run_id):
run_file = os.path.join(run_cache_dir,
"run_%d.xml" % int(run_id))
with io.open(run_file, encoding='utf8') as fh:
run = _create_task_from_xml(xml=fh.read())
run = _create_run_from_xml(xml=fh.read())
return run

except (OSError, IOError):
Expand Down
28 changes: 18 additions & 10 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,16 @@ def _create_description_xml(self):

# as a tag, it must be of the form ([a-zA-Z0-9_\-\.])+
# so we format time from 'mm/dd/yy hh:mm:ss' to 'mm-dd-yy_hh.mm.ss'
well_formatted_time = time.strftime("%c").replace(
' ', '_').replace('/', '-').replace(':', '.')
tags = run_environment + [well_formatted_time] + ['run_task'] + \
[self.model.__module__ + "." + self.model.__class__.__name__]
# well_formatted_time = time.strftime("%c").replace(
# ' ', '_').replace('/', '-').replace(':', '.')
# tags = run_environment + [well_formatted_time] + ['run_task'] + \
# [self.model.__module__ + "." + self.model.__class__.__name__]
tags = ['openml-python', run_environment[1]]
description = _to_dict(taskid=self.task_id, flow_id=self.flow_id,
setup_string=_create_setup_string(self.model),
parameter_settings=openml_param_settings,
error_message=self.error_message,
detailed_evaluations=self.detailed_evaluations,
tags=tags)
description_xml = xmltodict.unparse(description, pretty=True)
return description_xml
Expand Down Expand Up @@ -247,7 +249,7 @@ def _get_version_information():
return [python_version, sklearn_version, numpy_version, scipy_version]


def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, tags):
def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, tags=None, detailed_evaluations=None):
""" Creates a dictionary corresponding to the desired xml desired by openML

Parameters
Expand All @@ -274,11 +276,17 @@ def _to_dict(taskid, flow_id, setup_string, error_message, parameter_settings, t
if error_message is not None:
description['oml:run']['oml:error_message'] = error_message
description['oml:run']['oml:parameter_setting'] = parameter_settings
description['oml:run']['oml:tag'] = tags # Tags describing the run
# description['oml:run']['oml:output_data'] = 0;
# all data that was output of this run, which can be evaluation scores
# (though those are also calculated serverside)
# must be of special data type
if tags is not None:
description['oml:run']['oml:tag'] = tags # Tags describing the run
if detailed_evaluations is not None:
description['oml:run']['oml:output_data'] = dict()
description['oml:run']['oml:output_data']['oml:evaluation'] = list()
for measure in detailed_evaluations:
for repeat in detailed_evaluations[measure]:
for fold, value in detailed_evaluations[measure][repeat].items():
current = OrderedDict([('@repeat', str(repeat)), ('@fold', str(fold)),
('oml:name', measure), ('oml:value', str(value))])
description['oml:run']['oml:output_data']['oml:evaluation'].append(current)
return description


Expand Down
10 changes: 10 additions & 0 deletions openml/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,15 @@ def is_string(obj):
except NameError:
return isinstance(obj, str)

def version_complies(major, minor=None):
version = sys.version_info
if version[0] > major:
return True
if version[0] < major:
return False
# version == major
if minor is None or version[1] >= minor:
return True
return False

__all__ = ['URLError', 'is_string']