Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-41606: Option to output pipetask report info to command line #277

Merged
merged 2 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/changes/DM-41606.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Make option to output `pipetask report` information to the command-line using
astropy tables and set to default.
Unpack a more human-readable dictionary from
`lsst.pipe.base.QuantumGraphExecutionReports.to_summary_dict` and print summary
tables of quanta and datasets to the command-line. Save error messages and
associated data ids to a yaml file in the working directory, or optionally print
them to screen as well.
11 changes: 6 additions & 5 deletions python/lsst/ctrl/mpexec/cli/cmd/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,16 +329,17 @@ def update_graph_run(
@click.command(cls=PipetaskCommand)
@repo_argument()
@ctrlMpExecOpts.qgraph_argument()
@click.argument("output_yaml", type=click.Path(exists=False))
@click.option("--full-output-filename", default="", help="Summarize report in a yaml file")
@click.option("--logs/--no-logs", default=True, help="Get butler log datasets for extra information.")
def report(repo: str, qgraph: str, output_yaml: str, logs: bool = True) -> None:
@click.option("--show-errors", default=False, help="Pretty-print a dict of errors from failed quanta.")
def report(
repo: str, qgraph: str, full_output_filename: str = "", logs: bool = True, show_errors: bool = False
) -> None:
"""Write a yaml file summarizing the produced and missing expected datasets
in a quantum graph.

REPO is the location of the butler/registry config file.

QGRAPH is the URL to a serialized Quantum Graph file.

OUTPUT_YAML is the URL to store the summary report.
"""
script.report(repo, qgraph, output_yaml, logs)
script.report(repo, qgraph, full_output_filename, logs)
88 changes: 70 additions & 18 deletions python/lsst/ctrl/mpexec/cli/script/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,86 @@
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import pprint

import yaml
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switch the blank line order here. See PEP8 for import format.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use isort and black for formatting so I think this is right. pprint is part of core python and yaml is not so a line separates them.

from astropy.table import Table
from lsst.daf.butler import Butler
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should be a blank line between the astropy and Butler imports too, if you're being a format stickler.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isort won't allow that.

from lsst.pipe.base import QuantumGraph
from lsst.pipe.base.execution_reports import QuantumGraphExecutionReport


def report(butler_config: str, qgraph_uri: str, output_yaml: str, logs: bool = True) -> None:
"""Write a yaml file summarizing the produced and missing expected datasets
in a quantum graph.
def report(
butler_config: str,
qgraph_uri: str,
full_output_filename: str | None,
logs: bool = True,
show_errors: bool = False,
) -> None:
"""Summarize the produced and missing expected dataset in a quantum graph.

Parameters
----------
butler_config : `str`
The Butler used for this report. This should match the Butler used
for the run associated with the executed quantum graph.
qgraph_uri : `str`
The uri of the location of said quantum graph.
output_yaml : `str`
The name to be used for the summary yaml file.
logs : `bool`
Get butler log datasets for extra information.

See Also
--------
lsst.pipe.base.QuantumGraphExecutionReport.make_reports : Making reports.
lsst.pipe.base.QuantumGraphExecutionReport.write_summary_yaml : Summaries.
butler_config : `str`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The indenting looks a little funny here relative to the old doc string, like it's an extra 4 spaces over.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. I'm kind of surprised that the numpydoc validator doesn't spot this -- maybe I've missed some configuration setting for numpydoc. velin (which you have to install from pypi) does notice and fix it.

The Butler used for this report. This should match the Butler used
for the run associated with the executed quantum graph.
qgraph_uri : `str`
The uri of the location of said quantum graph.
full_output_filename : `str`
Output the full summary report to a yaml file (named herein).
Each data id and error message is keyed to a quantum graph node id.
A convenient output format for error-matching and cataloguing tools
such as the ones in the Campaign Management database. If this is
not included, quanta and dataset information will be printed to the
command-line instead.
logs : `bool`
Get butler log datasets for extra information (error messages).
show_errors : `bool`
If no output yaml is provided, print error messages to the
command-line along with the report. By default, these messages and
their associated data ids are stored in a yaml file with format
`{run timestamp}_err.yaml` in the working directory instead.
"""
butler = Butler.from_config(butler_config, writeable=False)
qgraph = QuantumGraph.loadUri(qgraph_uri)
report = QuantumGraphExecutionReport.make_reports(butler, qgraph)
report.write_summary_yaml(butler, output_yaml, do_store_logs=logs)
if not full_output_filename:
# this is the option to print to the command-line
summary_dict = report.to_summary_dict(butler, logs, human_readable=True)
dataset_table_rows = []
data_products = []
quanta_summary = []
error_summary = []
for task in summary_dict.keys():
for data_product in summary_dict[task]["outputs"]:
dataset_table_rows.append(summary_dict[task]["outputs"][data_product])
data_products.append(data_product)

quanta_summary.append(
{
"Task": task,
"Failed Quanta": summary_dict[task]["failed_quanta"],
"Blocked Quanta": summary_dict[task]["n_quanta_blocked"],
}
)

if "errors" in summary_dict[task].keys():
error_summary.append({task: summary_dict[task]["errors"]})
quanta = Table(quanta_summary)
datasets = Table(dataset_table_rows)
datasets.add_column(data_products, index=0, name="DatasetType")
quanta.pprint_all()
print("\n")
if show_errors:
pprint.pprint(error_summary)
print("\n")

Check warning on line 99 in python/lsst/ctrl/mpexec/cli/script/report.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/ctrl/mpexec/cli/script/report.py#L98-L99

Added lines #L98 - L99 were not covered by tests
else:
assert qgraph.metadata is not None, "Saved QGs always have metadata."
collection = qgraph.metadata["output_run"]
collection = str(collection)
run_name = collection.split("/")[-1]
with open(f"{run_name}_err.yaml", "w") as stream:
yaml.safe_dump(error_summary, stream)
datasets.pprint_all()
else:
report.write_summary_yaml(butler, full_output_filename, do_store_logs=logs)
22 changes: 19 additions & 3 deletions tests/test_cliCmdReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def tearDown(self) -> None:

def test_report(self):
"""Test for making a report on the produced and missing expected
datasets in a quantum graph. in a graph.
datasets in a quantum graph.
"""
metadata = {"output_run": "run"}
butler, qgraph = makeSimpleQGraph(
Expand All @@ -71,10 +71,9 @@ def test_report(self):

result = self.runner.invoke(
pipetask_cli,
["report", self.root, graph_uri, test_filename, "--no-logs"],
["report", self.root, graph_uri, "--full-output-filename", test_filename, "--no-logs"],
input="no",
)

# Check that we can read from the command line
self.assertEqual(result.exit_code, 0, clickResultMsg(result))

Expand All @@ -84,6 +83,23 @@ def test_report(self):
self.assertIsNotNone(report_output_dict["task0"])
self.assertIsNotNone(report_output_dict["task0"]["failed_quanta"])

result_hr = self.runner.invoke(
pipetask_cli,
["report", self.root, graph_uri, "--no-logs"],
input="no",
)

# Check that we can read from the command line
self.assertEqual(result_hr.exit_code, 0, clickResultMsg(result_hr))

# Check that we get string output
self.assertIsInstance(result_hr.stdout, str)

# Check that task0 and the failed quanta for task0 exist in the string
self.assertIn("task0", result_hr.stdout)
self.assertIn("Failed Quanta", result_hr.stdout)
self.assertIn("{'data_id': {'instrument': 'INSTR', 'detector': 0}}", result_hr.stdout)


if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions types.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
types-PyYAML