-
Notifications
You must be signed in to change notification settings - Fork 14
/
tracking.py
202 lines (156 loc) 路 5.88 KB
/
tracking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# FUTURE
# TODO: clean up traceback
# TODO: partially track things if things fail. this might lead to some
# confusion. so we should add a flag to indicate if the experiment was
# successful or not
import ast
import uuid
import click
import parso
import nbformat
from ploomber_engine._translator import translate_parameters
from IPython.core.interactiveshell import InteractiveShell
from ploomber_engine.ipython import PloomberClient, add_to_sys_path
from ploomber_engine.tracking.io import _process_content_data
from ploomber_engine._telemetry import telemetry
from ploomber_engine._util import find_cell_with_parameters_comment
try:
import jupytext
except ModuleNotFoundError:
jupytext = None
try:
import sklearn_evaluation
except ModuleNotFoundError:
sklearn_evaluation = None
# add support for extracting name from getitem operations
def extract_name(source):
mod = parso.parse(source.splitlines()[-1])
names = dict(mod.get_used_names())
if len(names) == 1:
return list(names)[0]
else:
try:
return _get_function_name(mod)
except Exception:
return None
def _get_function_name(mod):
leaf = mod.get_first_leaf()
children = leaf.parent.children
if len(children) == 2:
# simple case: function()
_, call = children
else:
# mod.name case: metrics.acuracy_score()
call = children[-1]
leaf = children[-2].children[1]
left = call.children[0].value
right = call.children[-1].value
if left == "(" and right == ")":
return leaf.value
def _safe_literal_eval(source):
try:
return ast.literal_eval(source)
except (SyntaxError, ValueError):
return source.strip()
class PloomberLogger(PloomberClient):
def _execute(self, tracker, uuid_, parameters):
execution_count = 1
# make sure that the current working directory is in the sys.path
# in case the user has local modules
with add_to_sys_path(self._cwd):
for index, cell in enumerate(self._nb.cells):
if cell.cell_type == "code":
self.execute_cell(
cell,
cell_index=index,
execution_count=execution_count,
store_history=False,
)
execution_count += 1
if cell["outputs"]:
out = _process_content_data(
cell["outputs"][-1], counter=None, idx=None
)
if out:
name = extract_name(cell.source)
if name:
if out[0] == "text/plain":
val = _safe_literal_eval(out[1])
else:
val = out[1]
parameters[name] = val
tracker.upsert(uuid_, parameters)
def execute(self, tracker, uuid_, parameters):
"""Execute the notebook"""
# FIXME: this logic is duplicated.
# it's also on PloomberClient.execute
original = InteractiveShell._instance
with self:
self._execute(tracker, uuid_, parameters)
if original is not None:
# restore original instance
InteractiveShell._instance = original
# restore inline matplotlib
try:
from matplotlib_inline.backend_inline import configure_inline_support
except ModuleNotFoundError:
pass
else:
configure_inline_support(original, "inline")
original.run_line_magic("matplotlib", "inline")
@click.command()
@click.argument("filename", type=click.Path(exists=True))
@click.option("-d", "--database", default="experiments.db")
@click.option("-p", "--parameters")
@click.option("-q", "--quiet", is_flag=True)
def _cli(filename, database, parameters, quiet):
return track_execution(
filename,
parameters=_parse_cli_parameters(parameters),
database=database,
quiet=quiet,
)
def _parse_param(value):
exp = parso.parse(value).children[0]
if exp.type == "name":
return exp.value
elif hasattr(exp, "value"):
return ast.literal_eval(exp.value)
else:
return exp.get_code()
def _parse_cli_parameters(parameters):
if parameters is None:
return {}
pairs = [pair.strip().split("=") for pair in parameters.split(",")]
return {k: _parse_param(v) for k, v in pairs}
@telemetry.log_call("track-execution")
def track_execution(filename, parameters=None, database="experiments.db", quiet=False):
"""
Execute a script or notebook and write outputs to a SQLite database
"""
if jupytext is None:
raise click.ClickException("Missing jupytext: pip install jupytext")
if sklearn_evaluation is None:
raise click.ClickException(
"Missing sklearn-evaluation: pip install sklearn-evaluation"
)
parameters = parameters or dict()
nb = jupytext.read(filename)
_, idx = find_cell_with_parameters_comment(nb)
if idx is None:
click.echo("Could not find block with the # parameters comment")
idx_injected_params = 0
else:
idx_injected_params = idx + 1
if not quiet:
click.echo(f"Parameters: {parameters}")
params = translate_parameters(parameters, comment="User parameters")
params_cell = nbformat.v4.new_code_cell(source=params)
nb.cells.insert(idx_injected_params, params_cell)
logger = PloomberLogger(nb, display_stdout=not quiet)
if not quiet:
click.echo("Running...")
tracker = sklearn_evaluation.SQLiteTracker(database)
uuid_ = str(uuid.uuid4())[:8]
tracker.insert(uuid_, parameters)
logger.execute(tracker=tracker, uuid_=uuid_, parameters=parameters)