From 00df3e7f5a972f548263d2a5135ed10b861a9112 Mon Sep 17 00:00:00 2001 From: pawel Date: Mon, 30 Mar 2020 12:35:05 +0200 Subject: [PATCH 01/15] init --- dvc/cli.py | 2 + dvc/command/viz.py | 27 +++++++++ dvc/repo/__init__.py | 1 + dvc/repo/init.py | 3 + dvc/repo/viz.py | 18 ++++++ dvc/visualization.py | 141 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 192 insertions(+) create mode 100644 dvc/command/viz.py create mode 100644 dvc/repo/viz.py create mode 100644 dvc/visualization.py diff --git a/dvc/cli.py b/dvc/cli.py index 17d43a974b..24cca580b8 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -3,6 +3,7 @@ import logging import sys +from dvc.command import viz from .command import ( add, cache, @@ -74,6 +75,7 @@ version, update, git_hook, + viz, ] diff --git a/dvc/command/viz.py b/dvc/command/viz.py new file mode 100644 index 0000000000..3d4a664d98 --- /dev/null +++ b/dvc/command/viz.py @@ -0,0 +1,27 @@ +import argparse +import logging + +from dvc.command.base import append_doc_link, CmdBase + +logger = logging.getLogger(__name__) + + +class CmdViz(CmdBase): + def run(self): + self.repo.viz(self.args.targets) + + +def add_parser(subparsers, parent_parser): + VIZ_HELP = "Visualize target metric file using vega.io" + + viz_parser = subparsers.add_parser( + "viz", + parents=[parent_parser], + description=append_doc_link(VIZ_HELP, "viz"), + help=VIZ_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + viz_parser.add_argument( + "targets", nargs="+", help="Metrics file to visualize." + ) + viz_parser.set_defaults(func=CmdViz) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index e7f2b4dfd3..ad135d85e3 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -59,6 +59,7 @@ class Repo(object): from dvc.repo.get import get from dvc.repo.get_url import get_url from dvc.repo.update import update + from dvc.repo.viz import viz def __init__(self, root_dir=None): from dvc.state import State diff --git a/dvc/repo/init.py b/dvc/repo/init.py index 3238bb8e94..ef51c3fb88 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -12,6 +12,7 @@ from dvc.utils import boxify from dvc.utils import relpath from dvc.utils.fs import remove +from dvc.visualization import VisualizationTemplates logger = logging.getLogger(__name__) @@ -100,6 +101,8 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True + VisualizationTemplates.init(dvc_dir) + proj = Repo(root_dir) scm.add([config.files["repo"]]) diff --git a/dvc/repo/viz.py b/dvc/repo/viz.py new file mode 100644 index 0000000000..f5c0b9853f --- /dev/null +++ b/dvc/repo/viz.py @@ -0,0 +1,18 @@ +import logging + +from dvc.visualization import Default1DArrayTemplate + +logger = logging.getLogger(__name__) + + +def viz(repo, targets, typ="csv", viz_template=None): + if typ == "csv": + import csv + + with open(targets[0], "r") as fd: + rdr = csv.reader(fd, delimiter=",") + lines = list(rdr) + assert len(lines) == 1 + values = lines[0] + + Default1DArrayTemplate(repo.dvc_dir).save(values) diff --git a/dvc/visualization.py b/dvc/visualization.py new file mode 100644 index 0000000000..4cd873e987 --- /dev/null +++ b/dvc/visualization.py @@ -0,0 +1,141 @@ +import json +import logging +import os +import tempfile + +from dvc.utils.fs import makedirs + + +logger = logging.getLogger(__name__) + + +class AbstractTemplate: + HTML_TEMPLATE = """ + + + Embedding Vega-Lite + + + + + +
+ + + +""" + + TEMPLATES_DIR = "visualisation" + INDENT = 4 + SEPARATORS = (",", ": ") + + def __init__(self, dvc_dir): + self.dvc_dir = dvc_dir + self.visualization_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) + + def dump(self): + import json + + makedirs(self.visualization_dir, exist_ok=True) + + with open( + os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "w+" + ) as fd: + json.dump( + self.DEFAULT_CONTENT, + fd, + indent=self.INDENT, + separators=self.SEPARATORS, + ) + + def load(self): + import json + + with open( + os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" + ) as fd: + return json.load(fd) + + def fill(self, data): + raise NotImplementedError + + def save(self, data): + + vega_json = self.fill(data) + + tmp_dir = tempfile.mkdtemp("dvc-viz") + path = os.path.join(tmp_dir, "vis.html") + with open(path, "w") as fd: + fd.write(self.HTML_TEMPLATE.format(vega_json=vega_json)) + + logger.error("PATH: {}".format(path)) + + +class Default1DArrayTemplate(AbstractTemplate): + def fill(self, data): + assert isinstance(data, list) + assert not isinstance(data[0], list) + with open( + os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" + ) as fd: + content = json.load(fd) + + data_entry_template = '{{"x":{},"y":{}}},' + to_inject = "[" + for index, v in enumerate(data): + to_inject += data_entry_template.format(index, v) + to_inject += "]" + + content["data"][0]["values"] = to_inject + return str(content) + + TEMPLATE_NAME = "default_1d_array.json" + DEFAULT_CONTENT = { + "$schema": "https://vega.github.io/schema/vega/v5.json", + "width": 500, + "height": 500, + "data": [{"name": "table", "values": ""}], + "scales": [ + { + "name": "x", + "type": "point", + "range": "width", + "domain": {"data": "table", "field": "x"}, + }, + { + "name": "y", + "type": "linear", + "range": "height", + "domain": {"data": "table", "field": "y"}, + }, + ], + "axes": [ + {"orient": "bottom", "scale": "x"}, + {"orient": "left", "scale": "y"}, + ], + "marks": [ + { + "type": "line", + "from": {"data": "table"}, + "encode": { + "enter": { + "x": {"scale": "x", "field": "x"}, + "y": {"scale": "y", "field": "y"}, + "strokeWidth": {"value": 3}, + } + }, + } + ], + } + + +TEMPLATES = [Default1DArrayTemplate] + + +class VisualizationTemplates: + @staticmethod + def init(dvc_dir): + [t(dvc_dir).dump() for t in TEMPLATES] From 4747075653fea32faff15e93b5d48d517879bf9d Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 31 Mar 2020 15:38:33 +0200 Subject: [PATCH 02/15] rename to plot data insertion basig on dicts update --- dvc/cli.py | 4 +- dvc/command/{viz.py => plot.py} | 18 ++-- dvc/plot.py | 104 +++++++++++++++++++++++ dvc/repo/__init__.py | 2 +- dvc/repo/init.py | 4 +- dvc/repo/plot.py | 35 ++++++++ dvc/repo/viz.py | 18 ---- dvc/visualization.py | 141 -------------------------------- 8 files changed, 153 insertions(+), 173 deletions(-) rename dvc/command/{viz.py => plot.py} (51%) create mode 100644 dvc/plot.py create mode 100644 dvc/repo/plot.py delete mode 100644 dvc/repo/viz.py delete mode 100644 dvc/visualization.py diff --git a/dvc/cli.py b/dvc/cli.py index 24cca580b8..07e51b5d6c 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -3,7 +3,7 @@ import logging import sys -from dvc.command import viz +from dvc.command import plot from .command import ( add, cache, @@ -75,7 +75,7 @@ version, update, git_hook, - viz, + plot, ] diff --git a/dvc/command/viz.py b/dvc/command/plot.py similarity index 51% rename from dvc/command/viz.py rename to dvc/command/plot.py index 3d4a664d98..ddb155d465 100644 --- a/dvc/command/viz.py +++ b/dvc/command/plot.py @@ -6,22 +6,22 @@ logger = logging.getLogger(__name__) -class CmdViz(CmdBase): +class CmdPlot(CmdBase): def run(self): - self.repo.viz(self.args.targets) + self.repo.plot(self.args.targets) def add_parser(subparsers, parent_parser): - VIZ_HELP = "Visualize target metric file using vega.io" + PLOT_HELP = "Visualize target metric file using vega.io" - viz_parser = subparsers.add_parser( - "viz", + plot_parser = subparsers.add_parser( + "plot", parents=[parent_parser], - description=append_doc_link(VIZ_HELP, "viz"), - help=VIZ_HELP, + description=append_doc_link(PLOT_HELP, "plot"), + help=PLOT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) - viz_parser.add_argument( + plot_parser.add_argument( "targets", nargs="+", help="Metrics file to visualize." ) - viz_parser.set_defaults(func=CmdViz) + plot_parser.set_defaults(func=CmdPlot) diff --git a/dvc/plot.py b/dvc/plot.py new file mode 100644 index 0000000000..630314edcf --- /dev/null +++ b/dvc/plot.py @@ -0,0 +1,104 @@ +import json +import logging +import os + +from dvc.utils.fs import makedirs + + +logger = logging.getLogger(__name__) + + +class AbstractTemplate: + HTML_TEMPLATE = """ + + + Embedding Vega-Lite + + + + + +
+ + + +""" + + TEMPLATES_DIR = "plot" + INDENT = 4 + SEPARATORS = (",", ": ") + + def __init__(self, dvc_dir): + self.dvc_dir = dvc_dir + self.plot_templates_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) + + def dump(self): + import json + + makedirs(self.plot_templates_dir, exist_ok=True) + + with open( + os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "w+" + ) as fd: + json.dump( + self.DEFAULT_CONTENT, + fd, + indent=self.INDENT, + separators=self.SEPARATORS, + ) + + def load(self): + import json + + with open( + os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" + ) as fd: + return json.load(fd) + + def fill(self, data): + raise NotImplementedError + + def save(self, update_dict, path): + vega_dict = self.fill(update_dict) + + with open(path, "w") as fd: + fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) + + logger.error("PATH: {}".format(path)) + + +class DefaultTemplate(AbstractTemplate): + TEMPLATE_NAME = "default.json" + + DEFAULT_CONTENT = { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "title": "", + "data": {"values": []}, + "mark": {"type": "line"}, + "encoding": { + "x": {"field": "x", "type": "quantitative"}, + "y": {"field": "y", "type": "quantitative"}, + "color": {"field": "revision", "type": "nominal"}, + }, + } + + def fill(self, update_dict): + with open( + os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" + ) as fd: + vega_spec = json.load(fd) + + vega_spec.update(update_dict) + return vega_spec + + +TEMPLATES = [DefaultTemplate] + + +class PlotTemplates: + @staticmethod + def init(dvc_dir): + [t(dvc_dir).dump() for t in TEMPLATES] diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index ad135d85e3..9da9fdae7f 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -59,7 +59,7 @@ class Repo(object): from dvc.repo.get import get from dvc.repo.get_url import get_url from dvc.repo.update import update - from dvc.repo.viz import viz + from dvc.repo.plot import plot def __init__(self, root_dir=None): from dvc.state import State diff --git a/dvc/repo/init.py b/dvc/repo/init.py index ef51c3fb88..d8e5691e24 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -12,7 +12,7 @@ from dvc.utils import boxify from dvc.utils import relpath from dvc.utils.fs import remove -from dvc.visualization import VisualizationTemplates +from dvc.plot import PlotTemplates logger = logging.getLogger(__name__) @@ -101,7 +101,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True - VisualizationTemplates.init(dvc_dir) + PlotTemplates.init(dvc_dir) proj = Repo(root_dir) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py new file mode 100644 index 0000000000..1e008d6ce1 --- /dev/null +++ b/dvc/repo/plot.py @@ -0,0 +1,35 @@ +import logging +import os + +from dvc.plot import DefaultTemplate + +logger = logging.getLogger(__name__) + + +def create_data_dict(target, typ): + result = {} + if typ == "json": + import json + + with open(target, "r+") as fd: + data = json.load(fd) + for d in data: + d["revision"] = "HEAD" + + result["data"] = {} + result["data"]["values"] = data + result["title"] = target + return result + + +def plot(repo, targets, typ="json"): + # TODO how to handle multiple targets + target = targets[0] + vega_data_dict = create_data_dict(target, typ) + + # TODO need to pass title, probably need a way to pass additional config + # from json file + + DefaultTemplate(repo.dvc_dir).save( + vega_data_dict, os.path.join(repo.root_dir, "viz.html") + ) diff --git a/dvc/repo/viz.py b/dvc/repo/viz.py deleted file mode 100644 index f5c0b9853f..0000000000 --- a/dvc/repo/viz.py +++ /dev/null @@ -1,18 +0,0 @@ -import logging - -from dvc.visualization import Default1DArrayTemplate - -logger = logging.getLogger(__name__) - - -def viz(repo, targets, typ="csv", viz_template=None): - if typ == "csv": - import csv - - with open(targets[0], "r") as fd: - rdr = csv.reader(fd, delimiter=",") - lines = list(rdr) - assert len(lines) == 1 - values = lines[0] - - Default1DArrayTemplate(repo.dvc_dir).save(values) diff --git a/dvc/visualization.py b/dvc/visualization.py deleted file mode 100644 index 4cd873e987..0000000000 --- a/dvc/visualization.py +++ /dev/null @@ -1,141 +0,0 @@ -import json -import logging -import os -import tempfile - -from dvc.utils.fs import makedirs - - -logger = logging.getLogger(__name__) - - -class AbstractTemplate: - HTML_TEMPLATE = """ - - - Embedding Vega-Lite - - - - - -
- - - -""" - - TEMPLATES_DIR = "visualisation" - INDENT = 4 - SEPARATORS = (",", ": ") - - def __init__(self, dvc_dir): - self.dvc_dir = dvc_dir - self.visualization_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) - - def dump(self): - import json - - makedirs(self.visualization_dir, exist_ok=True) - - with open( - os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "w+" - ) as fd: - json.dump( - self.DEFAULT_CONTENT, - fd, - indent=self.INDENT, - separators=self.SEPARATORS, - ) - - def load(self): - import json - - with open( - os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" - ) as fd: - return json.load(fd) - - def fill(self, data): - raise NotImplementedError - - def save(self, data): - - vega_json = self.fill(data) - - tmp_dir = tempfile.mkdtemp("dvc-viz") - path = os.path.join(tmp_dir, "vis.html") - with open(path, "w") as fd: - fd.write(self.HTML_TEMPLATE.format(vega_json=vega_json)) - - logger.error("PATH: {}".format(path)) - - -class Default1DArrayTemplate(AbstractTemplate): - def fill(self, data): - assert isinstance(data, list) - assert not isinstance(data[0], list) - with open( - os.path.join(self.visualization_dir, self.TEMPLATE_NAME), "r" - ) as fd: - content = json.load(fd) - - data_entry_template = '{{"x":{},"y":{}}},' - to_inject = "[" - for index, v in enumerate(data): - to_inject += data_entry_template.format(index, v) - to_inject += "]" - - content["data"][0]["values"] = to_inject - return str(content) - - TEMPLATE_NAME = "default_1d_array.json" - DEFAULT_CONTENT = { - "$schema": "https://vega.github.io/schema/vega/v5.json", - "width": 500, - "height": 500, - "data": [{"name": "table", "values": ""}], - "scales": [ - { - "name": "x", - "type": "point", - "range": "width", - "domain": {"data": "table", "field": "x"}, - }, - { - "name": "y", - "type": "linear", - "range": "height", - "domain": {"data": "table", "field": "y"}, - }, - ], - "axes": [ - {"orient": "bottom", "scale": "x"}, - {"orient": "left", "scale": "y"}, - ], - "marks": [ - { - "type": "line", - "from": {"data": "table"}, - "encode": { - "enter": { - "x": {"scale": "x", "field": "x"}, - "y": {"scale": "y", "field": "y"}, - "strokeWidth": {"value": 3}, - } - }, - } - ], - } - - -TEMPLATES = [Default1DArrayTemplate] - - -class VisualizationTemplates: - @staticmethod - def init(dvc_dir): - [t(dvc_dir).dump() for t in TEMPLATES] From f0434f553f5c941e7c64c5c8de4013c5f2d1b316 Mon Sep 17 00:00:00 2001 From: pawel Date: Tue, 31 Mar 2020 19:29:38 +0200 Subject: [PATCH 03/15] revision support --- dvc/command/plot.py | 12 ++++++- dvc/plot.py | 27 +++----------- dvc/repo/plot.py | 86 +++++++++++++++++++++++++++++++++++++-------- 3 files changed, 87 insertions(+), 38 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index ddb155d465..5a90979ea9 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -8,7 +8,7 @@ class CmdPlot(CmdBase): def run(self): - self.repo.plot(self.args.targets) + self.repo.plot(self.args.targets, self.args.a_rev, self.args.b_rev) def add_parser(subparsers, parent_parser): @@ -21,6 +21,16 @@ def add_parser(subparsers, parent_parser): help=PLOT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) + plot_parser.add_argument( + "--a_rev", + help="Old Git commit to compare (defaults to HEAD)", + nargs="?", + ) + plot_parser.add_argument( + "--b_rev", + help=("New Git commit to compare (defaults to the current workspace)"), + nargs="?", + ) plot_parser.add_argument( "targets", nargs="+", help="Metrics file to visualize." ) diff --git a/dvc/plot.py b/dvc/plot.py index 630314edcf..8274e6dbcb 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -9,23 +9,6 @@ class AbstractTemplate: - HTML_TEMPLATE = """ - - - Embedding Vega-Lite - - - - - -
- - - -""" TEMPLATES_DIR = "plot" INDENT = 4 @@ -61,13 +44,13 @@ def load(self): def fill(self, data): raise NotImplementedError - def save(self, update_dict, path): - vega_dict = self.fill(update_dict) + # def save(self, update_dict, path): + # vega_dict = self.fill(update_dict) - with open(path, "w") as fd: - fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) + # with open(path, "w") as fd: + # fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) - logger.error("PATH: {}".format(path)) + # logger.error("PATH: {}".format(path)) class DefaultTemplate(AbstractTemplate): diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 1e008d6ce1..9931dd832a 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,20 +1,75 @@ +import json import logging -import os from dvc.plot import DefaultTemplate logger = logging.getLogger(__name__) -def create_data_dict(target, typ): +class PageTemplate: + HTML = """ + + + + Embedding Vega-Lite + + + + + + {divs} + + """ + + @staticmethod + def save(divs, path): + page = PageTemplate.HTML.format(divs="\n".join(divs)) + with open(path, "w") as fd: + fd.write(page) + + +class DivTemplate: + HTML = """ +
+ """ + + @staticmethod + def prepare_div(vega_dict): + from shortuuid import uuid + + id = uuid() + return DivTemplate.HTML.format( + id=str(id), + vega_dict=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + ) + + +def _load(tree, target, revision="curren workspace"): + with tree.open(target, "r") as fd: + data = json.load(fd) + for d in data: + d["revision"] = revision + return data + + +def create_data_dict(repo, target, typ, a_rev=None, b_rev=None): result = {} + data = [] if typ == "json": - import json - with open(target, "r+") as fd: - data = json.load(fd) - for d in data: - d["revision"] = "HEAD" + if a_rev and b_rev: + logger.error("diff") + a_tree = repo.scm.get_tree(a_rev) + b_tree = repo.scm.get_tree(b_rev) + logger.error((a_tree, b_tree)) + data.extend(_load(a_tree, target, a_rev)) + data.extend(_load(b_tree, target, b_rev)) + else: + logger.error(str(repo.tree.tree)) + data.extend(_load(repo.tree, target)) result["data"] = {} result["data"]["values"] = data @@ -22,14 +77,15 @@ def create_data_dict(target, typ): return result -def plot(repo, targets, typ="json"): +def plot(repo, targets, a_rev=None, b_rev=None, typ="json"): # TODO how to handle multiple targets - target = targets[0] - vega_data_dict = create_data_dict(target, typ) + logger.error((a_rev, b_rev)) + divs = [] + for target in targets: + vega_data_dict = create_data_dict(repo, target, typ, a_rev, b_rev) - # TODO need to pass title, probably need a way to pass additional config - # from json file + # TODO need to pass title, probably need a way to pass additional conf - DefaultTemplate(repo.dvc_dir).save( - vega_data_dict, os.path.join(repo.root_dir, "viz.html") - ) + vega_dict = DefaultTemplate(repo.dvc_dir).fill(vega_data_dict) + divs.append(DivTemplate.prepare_div(vega_dict)) + PageTemplate.save(divs, "viz.html") From 4235d29f113ba3bce55408298f2676c67c8825ed Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 11:50:53 +0200 Subject: [PATCH 04/15] roll back revision --- dvc/cli.py | 2 +- dvc/command/plot.py | 19 ++++-------- dvc/plot.py | 35 ++++++--------------- dvc/repo/init.py | 4 +-- dvc/repo/plot.py | 67 ++++++++++++----------------------------- setup.py | 1 + tests/func/test_plot.py | 37 +++++++++++++++++++++++ 7 files changed, 75 insertions(+), 90 deletions(-) create mode 100644 tests/func/test_plot.py diff --git a/dvc/cli.py b/dvc/cli.py index 07e51b5d6c..6a8f857faa 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -3,7 +3,6 @@ import logging import sys -from dvc.command import plot from .command import ( add, cache, @@ -36,6 +35,7 @@ update, version, git_hook, + plot, ) from .command.base import fix_subparsers from .exceptions import DvcParserError diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 5a90979ea9..32bfd4c941 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -2,17 +2,20 @@ import logging from dvc.command.base import append_doc_link, CmdBase +from dvc.utils import format_link logger = logging.getLogger(__name__) class CmdPlot(CmdBase): def run(self): - self.repo.plot(self.args.targets, self.args.a_rev, self.args.b_rev) + self.repo.plot(self.args.targets) def add_parser(subparsers, parent_parser): - PLOT_HELP = "Visualize target metric file using vega.io" + PLOT_HELP = "Visualize target metric file using {}.".format( + format_link("https://vega.github.io") + ) plot_parser = subparsers.add_parser( "plot", @@ -22,16 +25,6 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plot_parser.add_argument( - "--a_rev", - help="Old Git commit to compare (defaults to HEAD)", - nargs="?", - ) - plot_parser.add_argument( - "--b_rev", - help=("New Git commit to compare (defaults to the current workspace)"), - nargs="?", - ) - plot_parser.add_argument( - "targets", nargs="+", help="Metrics file to visualize." + "targets", nargs="+", help="Metric files to visualize." ) plot_parser.set_defaults(func=CmdPlot) diff --git a/dvc/plot.py b/dvc/plot.py index 8274e6dbcb..373840397f 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -1,7 +1,6 @@ import json import logging import os - from dvc.utils.fs import makedirs @@ -9,7 +8,6 @@ class AbstractTemplate: - TEMPLATES_DIR = "plot" INDENT = 4 SEPARATORS = (",", ": ") @@ -33,25 +31,9 @@ def dump(self): separators=self.SEPARATORS, ) - def load(self): - import json - - with open( - os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" - ) as fd: - return json.load(fd) - def fill(self, data): raise NotImplementedError - # def save(self, update_dict, path): - # vega_dict = self.fill(update_dict) - - # with open(path, "w") as fd: - # fd.write(self.HTML_TEMPLATE.format(vega_json=vega_dict)) - - # logger.error("PATH: {}".format(path)) - class DefaultTemplate(AbstractTemplate): TEMPLATE_NAME = "default.json" @@ -68,7 +50,12 @@ class DefaultTemplate(AbstractTemplate): }, } - def fill(self, update_dict): + def fill(self, data): + assert isinstance(data, list) + assert all({"x", "y", "revision"} == set(d.keys()) for d in data) + + update_dict = {"data": {"values": data}} + with open( os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" ) as fd: @@ -78,10 +65,6 @@ def fill(self, update_dict): return vega_spec -TEMPLATES = [DefaultTemplate] - - -class PlotTemplates: - @staticmethod - def init(dvc_dir): - [t(dvc_dir).dump() for t in TEMPLATES] +def init_plot_templates(dvc_dir): + templates = [DefaultTemplate] + [t(dvc_dir).dump() for t in templates] diff --git a/dvc/repo/init.py b/dvc/repo/init.py index d8e5691e24..71f9f0d634 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -6,13 +6,13 @@ from dvc import analytics from dvc.config import Config from dvc.exceptions import InitError, InvalidArgumentError +from dvc.plot import init_plot_templates from dvc.repo import Repo from dvc.scm import SCM from dvc.scm.base import SCMError from dvc.utils import boxify from dvc.utils import relpath from dvc.utils.fs import remove -from dvc.plot import PlotTemplates logger = logging.getLogger(__name__) @@ -101,7 +101,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True - PlotTemplates.init(dvc_dir) + init_plot_templates(dvc_dir) proj = Repo(root_dir) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 9931dd832a..dd444ba78b 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -7,19 +7,17 @@ class PageTemplate: - HTML = """ - - - - Embedding Vega-Lite - - - - - - {divs} - - """ + HTML = """ + + dvc plot + + + + + + {divs} + +""" @staticmethod def save(divs, path): @@ -32,7 +30,7 @@ class DivTemplate: HTML = """
""" @@ -43,11 +41,11 @@ def prepare_div(vega_dict): id = uuid() return DivTemplate.HTML.format( id=str(id), - vega_dict=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), ) -def _load(tree, target, revision="curren workspace"): +def _load(tree, target, revision="current workspace"): with tree.open(target, "r") as fd: data = json.load(fd) for d in data: @@ -55,37 +53,10 @@ def _load(tree, target, revision="curren workspace"): return data -def create_data_dict(repo, target, typ, a_rev=None, b_rev=None): - result = {} - data = [] - if typ == "json": - - if a_rev and b_rev: - logger.error("diff") - a_tree = repo.scm.get_tree(a_rev) - b_tree = repo.scm.get_tree(b_rev) - logger.error((a_tree, b_tree)) - data.extend(_load(a_tree, target, a_rev)) - data.extend(_load(b_tree, target, b_rev)) - else: - logger.error(str(repo.tree.tree)) - data.extend(_load(repo.tree, target)) - - result["data"] = {} - result["data"]["values"] = data - result["title"] = target - return result - - -def plot(repo, targets, a_rev=None, b_rev=None, typ="json"): - # TODO how to handle multiple targets - logger.error((a_rev, b_rev)) +def plot(repo, targets, plot_filename="plot.html", typ="json"): divs = [] for target in targets: - vega_data_dict = create_data_dict(repo, target, typ, a_rev, b_rev) - - # TODO need to pass title, probably need a way to pass additional conf - - vega_dict = DefaultTemplate(repo.dvc_dir).fill(vega_data_dict) - divs.append(DivTemplate.prepare_div(vega_dict)) - PageTemplate.save(divs, "viz.html") + data = _load(repo.tree, target) + vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data) + divs.append(DivTemplate.prepare_div(vega_plot_json)) + PageTemplate.save(divs, plot_filename) diff --git a/setup.py b/setup.py index 3b57acc4cd..82d382bf04 100644 --- a/setup.py +++ b/setup.py @@ -130,6 +130,7 @@ def run(self): "mock-ssh-server>=0.6.0", "moto==1.3.14.dev464", "rangehttpserver==1.2.0", + "beautifulsoup4==4.4.0", ] if (sys.version_info) >= (3, 6): diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py new file mode 100644 index 0000000000..460d461a4d --- /dev/null +++ b/tests/func/test_plot.py @@ -0,0 +1,37 @@ +import json +from copy import copy + +from bs4 import BeautifulSoup +from funcy import first + +from dvc.plot import DefaultTemplate + + +def _add_revision(data, rev="current workspace"): + new_data = copy(data) + for e in new_data: + e["revision"] = rev + + return new_data + + +def test_plot_vega_compliant_json(tmp_dir, dvc): + metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] + tmp_dir.dvc_gen({"metric.json": json.dumps(metric)}) + dvc.metrics.add("metric.json") + + dvc.plot(["metric.json"], "result.html") + + page = tmp_dir / "result.html" + + assert page.exists() + page_content = BeautifulSoup(page.read_text()) + + with_revision = _add_revision(metric) + expected_script_content = json.dumps( + DefaultTemplate(dvc.dvc_dir).fill(with_revision), + indent=4, + separators=(",", ": "), + ) + + assert expected_script_content in first(page_content.body.script.contents) From 731b7858124a330ce9b6a80fb59a76b0375d95ac Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 14:45:08 +0200 Subject: [PATCH 05/15] plot makedirs for backward compatibility --- dvc/plot.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dvc/plot.py b/dvc/plot.py index 373840397f..e4ac03b494 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -21,6 +21,9 @@ def dump(self): makedirs(self.plot_templates_dir, exist_ok=True) + if not os.path.exists(self.plot_templates_dir): + makedirs(self.plot_templates_dir) + with open( os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "w+" ) as fd: @@ -66,5 +69,6 @@ def fill(self, data): def init_plot_templates(dvc_dir): + templates = [DefaultTemplate] [t(dvc_dir).dump() for t in templates] From c7edae1299c5b8004df95a4eeeae2d07b5838384 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 14:51:06 +0200 Subject: [PATCH 06/15] log path --- dvc/command/plot.py | 1 + dvc/repo/plot.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 32bfd4c941..4d0ca2b3dc 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -10,6 +10,7 @@ class CmdPlot(CmdBase): def run(self): self.repo.plot(self.args.targets) + return 0 def add_parser(subparsers, parent_parser): diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index dd444ba78b..87be89765e 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -53,10 +53,14 @@ def _load(tree, target, revision="current workspace"): return data -def plot(repo, targets, plot_filename="plot.html", typ="json"): +def plot(repo, targets, plot_path="plot.html", typ="json"): divs = [] for target in targets: data = _load(repo.tree, target) vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data) divs.append(DivTemplate.prepare_div(vega_plot_json)) - PageTemplate.save(divs, plot_filename) + PageTemplate.save(divs, plot_path) + logger.info( + "Your can see your plot by opening '{}' in your " + "browser!".format(plot_path) + ) From b447d5e5a2c8623fd1e10036a341ef35ba31a7f0 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:02:54 +0200 Subject: [PATCH 07/15] pretty plot link to visualization page --- dvc/repo/plot.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 87be89765e..080051886d 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,7 +1,9 @@ import json import logging +import os from dvc.plot import DefaultTemplate +from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -61,6 +63,10 @@ def plot(repo, targets, plot_path="plot.html", typ="json"): divs.append(DivTemplate.prepare_div(vega_plot_json)) PageTemplate.save(divs, plot_path) logger.info( - "Your can see your plot by opening '{}' in your " - "browser!".format(plot_path) + "Your can see your plot by opening {} in your " + "browser!".format( + format_link( + "file://{}".format(os.path.join(repo.root_dir, plot_path)) + ) + ) ) From dc1a12033a2d5d6e55fb82f537ceb47e0c3f6287 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:28:35 +0200 Subject: [PATCH 08/15] make target default title --- dvc/plot.py | 6 +++--- dvc/repo/plot.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dvc/plot.py b/dvc/plot.py index e4ac03b494..c852b669bd 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -34,7 +34,7 @@ def dump(self): separators=self.SEPARATORS, ) - def fill(self, data): + def fill(self, data, data_src=""): raise NotImplementedError @@ -53,11 +53,11 @@ class DefaultTemplate(AbstractTemplate): }, } - def fill(self, data): + def fill(self, data, data_src=""): assert isinstance(data, list) assert all({"x", "y", "revision"} == set(d.keys()) for d in data) - update_dict = {"data": {"values": data}} + update_dict = {"data": {"values": data}, "title": data_src} with open( os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 080051886d..df4713c1a7 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -59,7 +59,7 @@ def plot(repo, targets, plot_path="plot.html", typ="json"): divs = [] for target in targets: data = _load(repo.tree, target) - vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data) + vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data, target) divs.append(DivTemplate.prepare_div(vega_plot_json)) PageTemplate.save(divs, plot_path) logger.info( From b51c9a1f2ba92d4e97d3ae1046c919d7e55d8ac6 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:51:59 +0200 Subject: [PATCH 09/15] efiop review --- dvc/repo/plot.py | 67 +++++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index df4713c1a7..7877e7b05e 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -3,13 +3,12 @@ import os from dvc.plot import DefaultTemplate +from dvc.repo import locked from dvc.utils import format_link logger = logging.getLogger(__name__) - -class PageTemplate: - HTML = """ +PAGE_HTML = """ dvc plot @@ -21,47 +20,51 @@ class PageTemplate: """ - @staticmethod - def save(divs, path): - page = PageTemplate.HTML.format(divs="\n".join(divs)) - with open(path, "w") as fd: - fd.write(page) - - -class DivTemplate: - HTML = """ -
- """ - - @staticmethod - def prepare_div(vega_dict): - from shortuuid import uuid - - id = uuid() - return DivTemplate.HTML.format( - id=str(id), - vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), - ) +DIV_HTML = """
+""" + + +def _save_plot_html(divs, path): + page = PAGE_HTML.format(divs="\n".join(divs)) + with open(path, "w") as fobj: + fobj.write(page) + + +def _prepare_div(vega_dict): + from shortuuid import uuid + + id = uuid() + return DIV_HTML.format( + id=str(id), + vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), + ) def _load(tree, target, revision="current workspace"): - with tree.open(target, "r") as fd: - data = json.load(fd) + with tree.open(target, "r") as fobj: + data = json.load(fobj) for d in data: d["revision"] = revision return data -def plot(repo, targets, plot_path="plot.html", typ="json"): +@locked +def plot(repo, targets, plot_path=None, typ="json"): + + if not plot_path: + plot_path = "plot.html" + divs = [] for target in targets: data = _load(repo.tree, target) vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data, target) - divs.append(DivTemplate.prepare_div(vega_plot_json)) - PageTemplate.save(divs, plot_path) + divs.append(_prepare_div(vega_plot_json)) + + _save_plot_html(divs, plot_path) + logger.info( "Your can see your plot by opening {} in your " "browser!".format( From 5ae252702e8e733ce73a0009de2c5bf4af080120 Mon Sep 17 00:00:00 2001 From: pawel Date: Wed, 1 Apr 2020 15:53:49 +0200 Subject: [PATCH 10/15] efiop review --- tests/func/test_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 460d461a4d..bc9b9378e2 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -29,7 +29,7 @@ def test_plot_vega_compliant_json(tmp_dir, dvc): with_revision = _add_revision(metric) expected_script_content = json.dumps( - DefaultTemplate(dvc.dvc_dir).fill(with_revision), + DefaultTemplate(dvc.dvc_dir).fill(with_revision, "metric.json"), indent=4, separators=(",", ": "), ) From 12093aa68c13c42400d3eccde84dc00686370d9a Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 03:24:37 +0200 Subject: [PATCH 11/15] proper id generation --- dvc/repo/plot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 7877e7b05e..73f4e8184f 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,6 +1,8 @@ import json import logging import os +import random +import string from dvc.plot import DefaultTemplate from dvc.repo import locked @@ -34,9 +36,8 @@ def _save_plot_html(divs, path): def _prepare_div(vega_dict): - from shortuuid import uuid - id = uuid() + id = random.sample(string.ascii_lowercase, 8) return DIV_HTML.format( id=str(id), vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), From 258956c70ec9905136aec4a1e9de613d46ad9d66 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 03:28:46 +0200 Subject: [PATCH 12/15] proper id generation --- dvc/repo/plot.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 73f4e8184f..c6a93131c1 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -36,8 +36,7 @@ def _save_plot_html(divs, path): def _prepare_div(vega_dict): - - id = random.sample(string.ascii_lowercase, 8) + id = "".join(random.sample(string.ascii_lowercase, 8)) return DIV_HTML.format( id=str(id), vega_json=json.dumps(vega_dict, indent=4, separators=(",", ": ")), From 5369aebdb2d55d2650b4d62b8e899a9291082be8 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 16:10:42 +0200 Subject: [PATCH 13/15] add confusion matrix template --- dvc/command/plot.py | 12 ++++++- dvc/plot.py | 75 +++++++++++++++++++++++++++++------------ dvc/repo/__init__.py | 3 ++ dvc/repo/init.py | 5 +-- dvc/repo/plot.py | 11 ++++-- tests/func/test_plot.py | 43 +++++++++++++++++++++-- 6 files changed, 116 insertions(+), 33 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 4d0ca2b3dc..43ca031333 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -9,7 +9,11 @@ class CmdPlot(CmdBase): def run(self): - self.repo.plot(self.args.targets) + self.repo.plot( + self.args.targets, + plot_path=self.args.path, + template=self.args.template, + ) return 0 @@ -25,6 +29,12 @@ def add_parser(subparsers, parent_parser): help=PLOT_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) + plot_parser.add_argument( + "--template", nargs="?", help="Template file to choose." + ) + plot_parser.add_argument( + "--path", nargs="?", help="Path to write plot HTML to." + ) plot_parser.add_argument( "targets", nargs="+", help="Metric files to visualize." ) diff --git a/dvc/plot.py b/dvc/plot.py index c852b669bd..270cfa7ab2 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -1,20 +1,21 @@ import json import logging import os + +from funcy import cached_property + from dvc.utils.fs import makedirs logger = logging.getLogger(__name__) -class AbstractTemplate: - TEMPLATES_DIR = "plot" +class Template: INDENT = 4 SEPARATORS = (",", ": ") - def __init__(self, dvc_dir): - self.dvc_dir = dvc_dir - self.plot_templates_dir = os.path.join(dvc_dir, self.TEMPLATES_DIR) + def __init__(self, templates_dir): + self.plot_templates_dir = templates_dir def dump(self): import json @@ -34,16 +35,25 @@ def dump(self): separators=self.SEPARATORS, ) - def fill(self, data, data_src=""): - raise NotImplementedError + @staticmethod + def fill(template_path, data, data_src=""): + assert isinstance(data, list) + assert all({"x", "y", "revision"} == set(d.keys()) for d in data) + + update_dict = {"data": {"values": data}, "title": data_src} + + with open(template_path, "r") as fd: + vega_spec = json.load(fd) + + vega_spec.update(update_dict) + return vega_spec -class DefaultTemplate(AbstractTemplate): +class DefaultLinearTemplate(Template): TEMPLATE_NAME = "default.json" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", - "title": "", "data": {"values": []}, "mark": {"type": "line"}, "encoding": { @@ -53,22 +63,43 @@ class DefaultTemplate(AbstractTemplate): }, } - def fill(self, data, data_src=""): - assert isinstance(data, list) - assert all({"x", "y", "revision"} == set(d.keys()) for d in data) - update_dict = {"data": {"values": data}, "title": data_src} +class DefaultConfusionTemplate(Template): + TEMPLATE_NAME = "default_confusion.json" + DEFAULT_CONTENT = { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": {"values": []}, + "mark": "rect", + "encoding": { + "x": { + "field": "x", + "type": "nominal", + "sort": "ascending", + "title": "Predicted value", + }, + "y": { + "field": "y", + "type": "nominal", + "sort": "ascending", + "title": "Actual value", + }, + "color": {"aggregate": "count", "type": "quantitative"}, + }, + } - with open( - os.path.join(self.plot_templates_dir, self.TEMPLATE_NAME), "r" - ) as fd: - vega_spec = json.load(fd) - vega_spec.update(update_dict) - return vega_spec +class PlotTemplates: + TEMPLATES_DIR = "plot" + TEMPLATES = [DefaultLinearTemplate, DefaultConfusionTemplate] + @cached_property + def templates_dir(self): + return os.path.join(self.dvc_dir, self.TEMPLATES_DIR) -def init_plot_templates(dvc_dir): + def __init__(self, dvc_dir): + self.dvc_dir = dvc_dir - templates = [DefaultTemplate] - [t(dvc_dir).dump() for t in templates] + if not os.path.exists(self.templates_dir): + makedirs(self.templates_dir, exist_ok=True) + for t in self.TEMPLATES: + t(self.templates_dir).dump() diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index 9da9fdae7f..30395fbc70 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -15,6 +15,7 @@ OutputNotFoundError, ) from dvc.path_info import PathInfo +from dvc.plot import PlotTemplates from dvc.remote.base import RemoteActionNotImplemented from dvc.utils.fs import path_isin from .graph import check_acyclic, get_pipeline, get_pipelines @@ -107,6 +108,8 @@ def __init__(self, root_dir=None): self._ignore() + self.plot_templates = PlotTemplates(self.dvc_dir) + @property def tree(self): return self._tree diff --git a/dvc/repo/init.py b/dvc/repo/init.py index 71f9f0d634..dda339bbd7 100644 --- a/dvc/repo/init.py +++ b/dvc/repo/init.py @@ -6,7 +6,6 @@ from dvc import analytics from dvc.config import Config from dvc.exceptions import InitError, InvalidArgumentError -from dvc.plot import init_plot_templates from dvc.repo import Repo from dvc.scm import SCM from dvc.scm.base import SCMError @@ -101,11 +100,9 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False): with config.edit() as conf: conf["core"]["no_scm"] = True - init_plot_templates(dvc_dir) - proj = Repo(root_dir) - scm.add([config.files["repo"]]) + scm.add([config.files["repo"], proj.plot_templates.templates_dir]) if scm.ignore_file: scm.add([os.path.join(dvc_dir, scm.ignore_file)]) diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index c6a93131c1..0a39fbd2b3 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -4,7 +4,7 @@ import random import string -from dvc.plot import DefaultTemplate +from dvc.plot import Template from dvc.repo import locked from dvc.utils import format_link @@ -52,15 +52,20 @@ def _load(tree, target, revision="current workspace"): @locked -def plot(repo, targets, plot_path=None, typ="json"): +def plot(repo, targets, plot_path=None, template=None, typ="json"): if not plot_path: plot_path = "plot.html" + if not template: + template = os.path.join( + repo.plot_templates.templates_dir, "default.json" + ) + divs = [] for target in targets: data = _load(repo.tree, target) - vega_plot_json = DefaultTemplate(repo.dvc_dir).fill(data, target) + vega_plot_json = Template.fill(template, data, target) divs.append(_prepare_div(vega_plot_json)) _save_plot_html(divs, plot_path) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index bc9b9378e2..52d222fad2 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,10 +1,11 @@ import json +import os from copy import copy from bs4 import BeautifulSoup from funcy import first -from dvc.plot import DefaultTemplate +from dvc.plot import Template def _add_revision(data, rev="current workspace"): @@ -15,7 +16,7 @@ def _add_revision(data, rev="current workspace"): return new_data -def test_plot_vega_compliant_json(tmp_dir, dvc): +def test_plot_linear(tmp_dir, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] tmp_dir.dvc_gen({"metric.json": json.dumps(metric)}) dvc.metrics.add("metric.json") @@ -29,7 +30,43 @@ def test_plot_vega_compliant_json(tmp_dir, dvc): with_revision = _add_revision(metric) expected_script_content = json.dumps( - DefaultTemplate(dvc.dvc_dir).fill(with_revision, "metric.json"), + Template.fill( + os.path.join(dvc.plot_templates.templates_dir, "default.json"), + with_revision, + "metric.json", + ), + indent=4, + separators=(",", ": "), + ) + + assert expected_script_content in first(page_content.body.script.contents) + + +def test_plot_confusion(tmp_dir, dvc): + confusion_matrix = [{"x": "B", "y": "A"}, {"x": "A", "y": "A"}] + tmp_dir.dvc_gen({"metric.json": json.dumps(confusion_matrix)}) + dvc.metrics.add("metric.json") + + dvc.plot( + ["metric.json"], + "result.html", + template=os.path.join( + dvc.plot_templates.templates_dir, "default_confusion.json" + ), + ) + + page = tmp_dir / "result.html" + + assert page.exists() + page_content = BeautifulSoup(page.read_text()) + + with_revision = _add_revision(confusion_matrix) + expected_script_content = json.dumps( + Template.fill( + os.path.join(".dvc", "plot", "default_confusion.json"), + with_revision, + "metric.json", + ), indent=4, separators=(",", ": "), ) From eda58746c6771d194f3d0684532a89f28f3de838 Mon Sep 17 00:00:00 2001 From: pawel Date: Thu, 2 Apr 2020 17:12:57 +0200 Subject: [PATCH 14/15] refactor tests --- tests/func/test_plot.py | 94 ++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index 52d222fad2..b1ca0761fa 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,74 +1,98 @@ import json import os -from copy import copy from bs4 import BeautifulSoup from funcy import first -from dvc.plot import Template +def _run_with_metric(tmp_dir, dvc, metric, metric_filename, commit=None): + tmp_dir.gen({metric_filename: json.dumps(metric)}) + dvc.run(metrics_no_cache=[metric_filename]) + if hasattr(dvc, "scm"): + dvc.scm.add([metric_filename, metric_filename + ".dvc"]) + if commit: + dvc.scm.commit(commit) -def _add_revision(data, rev="current workspace"): - new_data = copy(data) - for e in new_data: - e["revision"] = rev - return new_data - - -def test_plot_linear(tmp_dir, dvc): +def test_plot_linear(tmp_dir, scm, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] - tmp_dir.dvc_gen({"metric.json": json.dumps(metric)}) - dvc.metrics.add("metric.json") + _run_with_metric(tmp_dir, dvc, metric, "metric.json", "first run") dvc.plot(["metric.json"], "result.html") page = tmp_dir / "result.html" - assert page.exists() page_content = BeautifulSoup(page.read_text()) - with_revision = _add_revision(metric) - expected_script_content = json.dumps( - Template.fill( - os.path.join(dvc.plot_templates.templates_dir, "default.json"), - with_revision, - "metric.json", - ), + expected_vega_json = json.dumps( + { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": { + "values": [ + {"x": 1, "y": 2, "revision": "current workspace"}, + {"x": 2, "y": 3, "revision": "current workspace"}, + ] + }, + "mark": {"type": "line"}, + "encoding": { + "x": {"field": "x", "type": "quantitative"}, + "y": {"field": "y", "type": "quantitative"}, + "color": {"field": "revision", "type": "nominal"}, + }, + "title": "metric.json", + }, indent=4, separators=(",", ": "), ) - assert expected_script_content in first(page_content.body.script.contents) + assert expected_vega_json in first(page_content.body.script.contents) def test_plot_confusion(tmp_dir, dvc): confusion_matrix = [{"x": "B", "y": "A"}, {"x": "A", "y": "A"}] - tmp_dir.dvc_gen({"metric.json": json.dumps(confusion_matrix)}) - dvc.metrics.add("metric.json") + _run_with_metric( + tmp_dir, dvc, confusion_matrix, "metric.json", "first run" + ) dvc.plot( ["metric.json"], "result.html", - template=os.path.join( - dvc.plot_templates.templates_dir, "default_confusion.json" - ), + os.path.join(".dvc", "plot", "default_confusion.json"), ) page = tmp_dir / "result.html" - assert page.exists() page_content = BeautifulSoup(page.read_text()) - with_revision = _add_revision(confusion_matrix) - expected_script_content = json.dumps( - Template.fill( - os.path.join(".dvc", "plot", "default_confusion.json"), - with_revision, - "metric.json", - ), + expected_vega_json = json.dumps( + { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "data": { + "values": [ + {"x": "B", "y": "A", "revision": "current workspace"}, + {"x": "A", "y": "A", "revision": "current workspace"}, + ] + }, + "mark": "rect", + "encoding": { + "x": { + "field": "x", + "type": "nominal", + "sort": "ascending", + "title": "Predicted value", + }, + "y": { + "field": "y", + "type": "nominal", + "sort": "ascending", + "title": "Actual value", + }, + "color": {"aggregate": "count", "type": "quantitative"}, + }, + "title": "metric.json", + }, indent=4, separators=(",", ": "), ) - assert expected_script_content in first(page_content.body.script.contents) + assert expected_vega_json in first(page_content.body.script.contents) From 2701550629f09c5d009f66ae1f3615e94207fb85 Mon Sep 17 00:00:00 2001 From: pawel Date: Fri, 3 Apr 2020 15:04:23 +0200 Subject: [PATCH 15/15] plot from dvct file --- dvc/command/plot.py | 18 ++++++----- dvc/plot.py | 23 ++++++++++---- dvc/repo/plot.py | 68 ++++++++++++++++++++++++++--------------- tests/func/test_plot.py | 41 +++++++++++-------------- 4 files changed, 89 insertions(+), 61 deletions(-) diff --git a/dvc/command/plot.py b/dvc/command/plot.py index 43ca031333..d1de4a340c 100644 --- a/dvc/command/plot.py +++ b/dvc/command/plot.py @@ -1,5 +1,6 @@ import argparse import logging +import os from dvc.command.base import append_doc_link, CmdBase from dvc.utils import format_link @@ -9,10 +10,14 @@ class CmdPlot(CmdBase): def run(self): - self.repo.plot( - self.args.targets, - plot_path=self.args.path, - template=self.args.template, + path = self.repo.plot(self.args.target, template=self.args.template,) + logger.info( + "Your can see your plot by opening {} in your " + "browser!".format( + format_link( + "file://{}".format(os.path.join(self.repo.root_dir, path)) + ) + ) ) return 0 @@ -33,9 +38,6 @@ def add_parser(subparsers, parent_parser): "--template", nargs="?", help="Template file to choose." ) plot_parser.add_argument( - "--path", nargs="?", help="Path to write plot HTML to." - ) - plot_parser.add_argument( - "targets", nargs="+", help="Metric files to visualize." + "target", nargs="?", help="Metric files to visualize." ) plot_parser.set_defaults(func=CmdPlot) diff --git a/dvc/plot.py b/dvc/plot.py index 270cfa7ab2..f6103d2492 100644 --- a/dvc/plot.py +++ b/dvc/plot.py @@ -4,6 +4,7 @@ from funcy import cached_property +from dvc.exceptions import DvcException from dvc.utils.fs import makedirs @@ -35,16 +36,26 @@ def dump(self): separators=self.SEPARATORS, ) - @staticmethod - def fill(template_path, data, data_src=""): + def load_template(self, path): + try: + with open(path, "r") as fd: + return json.load(fd) + except FileNotFoundError: + try: + with open( + os.path.join(self.plot_templates_dir, path), "r" + ) as fd: + return json.load(fd) + except FileNotFoundError: + raise DvcException("Not in repo nor in defaults") + + def fill(self, template_path, data, data_src=""): assert isinstance(data, list) assert all({"x", "y", "revision"} == set(d.keys()) for d in data) update_dict = {"data": {"values": data}, "title": data_src} - with open(template_path, "r") as fd: - vega_spec = json.load(fd) - + vega_spec = self.load_template(template_path) vega_spec.update(update_dict) return vega_spec @@ -65,7 +76,7 @@ class DefaultLinearTemplate(Template): class DefaultConfusionTemplate(Template): - TEMPLATE_NAME = "default_confusion.json" + TEMPLATE_NAME = "cf.json" DEFAULT_CONTENT = { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": {"values": []}, diff --git a/dvc/repo/plot.py b/dvc/repo/plot.py index 0a39fbd2b3..4f9fa6e38d 100644 --- a/dvc/repo/plot.py +++ b/dvc/repo/plot.py @@ -1,12 +1,12 @@ import json import logging -import os import random +import re import string +from dvc.exceptions import DvcException from dvc.plot import Template from dvc.repo import locked -from dvc.utils import format_link logger = logging.getLogger(__name__) @@ -43,7 +43,7 @@ def _prepare_div(vega_dict): ) -def _load(tree, target, revision="current workspace"): +def _load_data(tree, target, revision="current workspace"): with tree.open(target, "r") as fobj: data = json.load(fobj) for d in data: @@ -51,30 +51,50 @@ def _load(tree, target, revision="current workspace"): return data -@locked -def plot(repo, targets, plot_path=None, template=None, typ="json"): +def _parse_plots(path): + with open(path, "r") as fobj: + content = fobj.read() - if not plot_path: - plot_path = "plot.html" + plot_regex = re.compile("") - if not template: - template = os.path.join( - repo.plot_templates.templates_dir, "default.json" - ) + plots = list(plot_regex.findall(content)) + return False, plots - divs = [] - for target in targets: - data = _load(repo.tree, target) - vega_plot_json = Template.fill(template, data, target) - divs.append(_prepare_div(vega_plot_json)) - _save_plot_html(divs, plot_path) +def _parse_plot_str(plot_str): + content = plot_str.replace("<", "") + content = content.replace(">", "") + args = content.split("::")[1:] + if len(args) == 2: + return args + elif len(args) == 1: + return args[0], "default.json" + raise DvcException("Error parsing") - logger.info( - "Your can see your plot by opening {} in your " - "browser!".format( - format_link( - "file://{}".format(os.path.join(repo.root_dir, plot_path)) - ) - ) + +def to_div(repo, plot_str): + datafile, templatefile = _parse_plot_str(plot_str) + + data = _load_data(repo.tree, datafile) + vega_plot_json = Template(repo.plot_templates.templates_dir).fill( + templatefile, data, datafile ) + return _prepare_div(vega_plot_json) + + +@locked +def plot(repo, template_file, revisions=None): + if revisions is None: + revisions = [] + + is_html, plot_strings = _parse_plots(template_file) + m = {plot_str: to_div(repo, plot_str) for plot_str in plot_strings} + + result = template_file.replace(".dvct", ".html") + if not is_html: + _save_plot_html( + [m[p] for p in plot_strings], result, + ) + return result + else: + raise NotImplementedError diff --git a/tests/func/test_plot.py b/tests/func/test_plot.py index b1ca0761fa..8ab02ecac8 100644 --- a/tests/func/test_plot.py +++ b/tests/func/test_plot.py @@ -1,5 +1,4 @@ import json -import os from bs4 import BeautifulSoup from funcy import first @@ -14,17 +13,22 @@ def _run_with_metric(tmp_dir, dvc, metric, metric_filename, commit=None): dvc.scm.commit(commit) -def test_plot_linear(tmp_dir, scm, dvc): +# TODO +def test_plot_in_html_file(tmp_dir): + pass + + +def test_plot_in_no_html(tmp_dir, scm, dvc): metric = [{"x": 1, "y": 2}, {"x": 2, "y": 3}] _run_with_metric(tmp_dir, dvc, metric, "metric.json", "first run") - dvc.plot(["metric.json"], "result.html") + template_content = "" + (tmp_dir / "template.dvct").write_text(template_content) - page = tmp_dir / "result.html" - assert page.exists() - page_content = BeautifulSoup(page.read_text()) + result = dvc.plot("template.dvct") - expected_vega_json = json.dumps( + page_content = BeautifulSoup((tmp_dir / result).read_text()) + assert json.dumps( { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": { @@ -43,9 +47,7 @@ def test_plot_linear(tmp_dir, scm, dvc): }, indent=4, separators=(",", ": "), - ) - - assert expected_vega_json in first(page_content.body.script.contents) + ) in first(page_content.body.script.contents) def test_plot_confusion(tmp_dir, dvc): @@ -53,18 +55,13 @@ def test_plot_confusion(tmp_dir, dvc): _run_with_metric( tmp_dir, dvc, confusion_matrix, "metric.json", "first run" ) + template_content = "" + (tmp_dir / "template.dvct").write_text(template_content) - dvc.plot( - ["metric.json"], - "result.html", - os.path.join(".dvc", "plot", "default_confusion.json"), - ) - - page = tmp_dir / "result.html" - assert page.exists() - page_content = BeautifulSoup(page.read_text()) + result = dvc.plot("template.dvct") - expected_vega_json = json.dumps( + page_content = BeautifulSoup((tmp_dir / result).read_text()) + assert json.dumps( { "$schema": "https://vega.github.io/schema/vega-lite/v4.json", "data": { @@ -93,6 +90,4 @@ def test_plot_confusion(tmp_dir, dvc): }, indent=4, separators=(",", ": "), - ) - - assert expected_vega_json in first(page_content.body.script.contents) + ) in first(page_content.body.script.contents)