Skip to content

Commit

Permalink
MAINT minor changes to code
Browse files Browse the repository at this point in the history
  • Loading branch information
mfeurer committed Feb 1, 2017
1 parent 90a88cc commit a972eeb
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 34 deletions.
2 changes: 1 addition & 1 deletion openml/flows/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .flow import OpenMLFlow
from .sklearn_converter import sklearn_to_flow, flow_to_sklearn
from .functions import get_flow, get_flow_dict
from .functions import get_flow

__all__ = ['OpenMLFlow', 'create_flow_from_model', 'get_flow',
'sklearn_to_flow', 'flow_to_sklearn']
18 changes: 1 addition & 17 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,4 @@ def get_flow(flow_id):
if 'sklearn' in flow.external_version:
flow.model = flow_to_sklearn(flow)

return flow


def get_flow_dict(flow):
"""Returns a dictionary with keys flow name and values flow id.
Parameters
----------
flow : OpenMLFlow
"""
if flow.flow_id is None:
raise PyOpenMLError(
"Can only invoke function 'get_flow_map' on a server downloaded flow. ")
flow_map = {flow.name: flow.flow_id}
for subflow in flow.components:
flow_map.update(get_flow_dict(flow.components[subflow]))

return flow_map
return flow
27 changes: 16 additions & 11 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,8 @@ def _run_task_get_arffcontent(model, task, class_labels):

model.fit(trainX, trainY)
if isinstance(model, BaseSearchCV):
for itt_no in range(0, len(model.cv_results_['mean_test_score'])):
# we use the string values for True and False, as it is defined in this way by the OpenML server
selected = 'false'
if itt_no == model.best_index_:
selected = 'true'
test_score = model.cv_results_['mean_test_score'][itt_no]
arff_line = [rep_no, fold_no, itt_no, test_score, selected]
for key in model.cv_results_:
if key.startswith("param_"):
arff_line.append(str(model.cv_results_[key][itt_no]))
arff_tracecontent.append(arff_line)
_add_results_to_arfftrace(arff_tracecontent, fold_no, model,
rep_no)

ProbaY = model.predict_proba(testX)
PredY = model.predict(testX)
Expand All @@ -113,6 +104,20 @@ def _run_task_get_arffcontent(model, task, class_labels):
return arff_datacontent, arff_tracecontent


def _add_results_to_arfftrace(arff_tracecontent, fold_no, model, rep_no):
for itt_no in range(0, len(model.cv_results_['mean_test_score'])):
# we use the string values for True and False, as it is defined in this way by the OpenML server
selected = 'false'
if itt_no == model.best_index_:
selected = 'true'
test_score = model.cv_results_['mean_test_score'][itt_no]
arff_line = [rep_no, fold_no, itt_no, test_score, selected]
for key in model.cv_results_:
if key.startswith("param_"):
arff_line.append(str(model.cv_results_[key][itt_no]))
arff_tracecontent.append(arff_line)


def get_runs(run_ids):
"""Gets all runs in run_ids list.
Expand Down
16 changes: 11 additions & 5 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ def _generate_trace_arff_dict(self, model):
type = 'NUMERIC'
else:
values = list(set(model.cv_results_[key])) # unique values
if len(values) < 100: # arbitrary number. make it an option?
type = [str(i) for i in values]
type = [str(i) for i in values]
print(key + ": " + str(type))

attribute = ("parameter_" + key[6:], type)
Expand Down Expand Up @@ -179,19 +178,26 @@ def _create_description_xml(self):
return description_xml

def _parse_parameters(model, flow):
"""Extracts all parameter settings from an model in OpenML format.
"""Extracts all parameter settings from a model in OpenML format.
Parameters
----------
model
the sci-kit learn model (fitted)
the scikit-learn model (fitted)
flow
openml flow object (containing flow ids, i.e., it has to be downloaded from the server)
"""
python_param_settings = model.get_params()
openml_param_settings = []
flow_dict = openml.flows.get_flow_dict(flow)

def get_flow_dict(_flow):
flow_map = {_flow.name: _flow.flow_id}
for subflow in _flow.components:
flow_map.update(get_flow_dict(_flow.components[subflow]))
return flow_map

flow_dict = get_flow_dict(flow)

for param in python_param_settings:
if "__" in param:
Expand Down

0 comments on commit a972eeb

Please sign in to comment.