From 60e8560f3bd3c4ec77127f3390cbc8f1f63e456d Mon Sep 17 00:00:00 2001 From: Nissan Pow Date: Fri, 10 Jan 2025 23:03:32 -0800 Subject: [PATCH] add support for diff-runs. also added tests --- .gitignore | 4 + README.md | 4 + metaflow_diff/metaflow_diff.py | 142 +++++++++++++++++++++------------ tests/test_metaflow_diff.py | 82 +++++++++++++++++++ 4 files changed, 183 insertions(+), 49 deletions(-) create mode 100644 .gitignore create mode 100644 tests/test_metaflow_diff.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..db03e50 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.idea +__pycache__ +*.egg-info +*.pyc diff --git a/README.md b/README.md index 74cb6d8..5f91dfe 100644 --- a/README.md +++ b/README.md @@ -34,4 +34,8 @@ metaflow-diff patch --file my.patch HelloFlow/5 Produce a patch file that, if applied, changes the code in the current working directory to match that of the run. +``` +metaflow-diff diff-runs HelloFlow/5 HelloFlow/6 +``` +Show diff between the two given runs. diff --git a/metaflow_diff/metaflow_diff.py b/metaflow_diff/metaflow_diff.py index a209a31..eea1b86 100755 --- a/metaflow_diff/metaflow_diff.py +++ b/metaflow_diff/metaflow_diff.py @@ -1,15 +1,13 @@ #!/usr/bin/env python -import sys import os import shutil +import sys +from subprocess import PIPE, run from tempfile import TemporaryDirectory -from subprocess import call, run, PIPE -import os -from metaflow import namespace, Run -from metaflow.cli import echo_always import click - +from metaflow import Run, namespace +from metaflow.cli import echo_always EXCLUSIONS = [ "metaflow/", @@ -20,34 +18,93 @@ ] -def git_diff(tmpdir, output=False): - for dirpath, dirnames, filenames in os.walk(tmpdir): +def echo(line): + echo_always(line, err=True, fg="magenta") + + +def extract_code_package(runspec, exclusions): + try: + namespace(None) + run = Run(runspec) + echo(f"✅ Run *{runspec}* found, downloading code..") + except: + echo(f"❌ Run **{runspec}** not found") + sys.exit(1) + + if run.code is None: + echo( + f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?" + ) + sys.exit(1) + + tar = run.code.tarball + members = [ + m for m in tar.getmembers() if not any(m.name.startswith(x) for x in exclusions) + ] + + tmp = TemporaryDirectory() + tar.extractall(tmp.name, members) + return tmp + + +def perform_diff(source_dir, target_dir, output=False): + diffs = [] + for dirpath, dirnames, filenames in os.walk(source_dir): for fname in filenames: - rel = os.path.relpath(dirpath, tmpdir) - gitpath = os.path.join(rel, fname) - if os.path.exists(gitpath): + source_file = os.path.join(dirpath, fname) + rel_path = os.path.relpath(source_file, source_dir) + target_file = os.path.join(target_dir, rel_path) + + if os.path.exists(target_file): cmd = [ "git", "diff", "--no-index", - gitpath, - os.path.join(dirpath, fname), + "--exit-code", + "--color", + source_file, + target_file, ] + result = run(cmd, text=True, stdout=PIPE, stderr=PIPE) + if result.returncode == 0: + echo(f"✅ {rel_path} is identical, skipping") + continue + if output: - yield run(cmd, text=True, stdout=PIPE).stdout + diffs.append(result.stdout) else: - run(cmd) + run(["less", "-R"], input=result.stdout, text=True) else: - echo(f"❗ {gitpath} not in the Git repo, skipping") + echo(f"❗ {rel_path} not in the target directory, skipping") + return diffs if output else None -def echo(line): - echo_always(line, err=True, fg="magenta") +def run_op(runspec, op, op_args): + tmp = None + try: + tmp = extract_code_package(runspec, EXCLUSIONS) + op(tmp.name, **op_args) + finally: + if tmp and os.path.exists(tmp.name): + shutil.rmtree(tmp.name) + + +def run_op_diff_runs(source_run, target_run): + source_tmp = None + target_tmp = None + try: + source_tmp = extract_code_package(source_run, EXCLUSIONS) + target_tmp = extract_code_package(target_run, EXCLUSIONS) + perform_diff(source_tmp.name, target_tmp.name) + finally: + if source_tmp and os.path.exists(source_tmp.name): + shutil.rmtree(source_tmp.name) + if target_tmp and os.path.exists(target_tmp.name): + shutil.rmtree(target_tmp.name) def op_diff(tmpdir): - for _ in git_diff(tmpdir): - pass + perform_diff(tmpdir, os.getcwd()) def op_pull(tmpdir, dst=None): @@ -59,8 +116,9 @@ def op_pull(tmpdir, dst=None): def op_patch(tmpdir, dst=None): + diffs = perform_diff(tmpdir, os.getcwd(), output=True) with open(dst, "w") as f: - for out in git_diff(tmpdir, output=True): + for out in diffs: out = out.replace(tmpdir, "/.") out = out.replace("+++ b/./", "+++ b/") out = out.replace("--- b/./", "--- b/") @@ -81,34 +139,6 @@ def op_patch(tmpdir, dst=None): ) -def run_op(runspec, op, op_args): - try: - namespace(None) - run = Run(runspec) - echo(f"✅ Run *{runspec}* found, downloading code..") - except: - echo(f"❌ Run **{runspec}** not found") - sys.exit(1) - if run.code is None: - echo( - f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?" - ) - sys.exit(1) - tar = run.code.tarball - members = [] - for m in tar.getmembers(): - if not any(m.name.startswith(x) for x in EXCLUSIONS): - members.append(m) - tmp = None - try: - tmp = TemporaryDirectory() - tar.extractall(tmp.name, members) - op(tmp.name, **op_args) - finally: - if tmp and os.path.exists(tmp.name): - shutil.rmtree(tmp.name) - - @click.group() def cli(): pass @@ -124,6 +154,16 @@ def diff(metaflow_run=None): run_op(metaflow_run, op_diff, {}) +@cli.command() +@click.argument("source_run") +@click.argument("target_run") +def diff_runs(source_run, target_run): + """ + Show a 'git diff' between two Metaflow runs, e.g. HelloFlow/3 (source) and HelloFlow/4 (target) + """ + run_op_diff_runs(source_run, target_run) + + @cli.command() @click.argument("metaflow_run") @click.option( @@ -150,3 +190,7 @@ def patch(metaflow_run, file=None): if file is None: file = metaflow_run.lower().replace("/", "_") + ".patch" run_op(metaflow_run, op_patch, {"dst": file}) + + +if __name__ == "__main__": + cli() diff --git a/tests/test_metaflow_diff.py b/tests/test_metaflow_diff.py new file mode 100644 index 0000000..e6c3d3f --- /dev/null +++ b/tests/test_metaflow_diff.py @@ -0,0 +1,82 @@ +import os +import tempfile +import unittest +from subprocess import PIPE +from unittest.mock import MagicMock, patch + +from metaflow_diff.metaflow_diff import ( + EXCLUSIONS, + extract_code_package, + op_diff, + op_patch, + op_pull, + perform_diff, + run_op, +) + + +class TestMetaflowDiff(unittest.TestCase): + + @patch("metaflow_diff.metaflow_diff.Run") + @patch("metaflow_diff.metaflow_diff.namespace") + def test_extract_code_package(self, mock_namespace, mock_run): + mock_run.return_value.code.tarball.getmembers.return_value = [] + mock_run.return_value.code.tarball.extractall = MagicMock() + runspec = "HelloFlow/3" + + with patch( + "tempfile.TemporaryDirectory", return_value=tempfile.TemporaryDirectory() + ) as mock_tmp: + tmp = extract_code_package(runspec, EXCLUSIONS) + + mock_namespace.assert_called_once_with(None) + mock_run.assert_called_once_with(runspec) + self.assertTrue(os.path.exists(tmp.name)) + + @patch("metaflow_diff.metaflow_diff.run") + def test_perform_diff(self, mock_run): + with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as target_dir: + source_file = os.path.join(source_dir, "file.txt") + target_file = os.path.join(target_dir, "file.txt") + with open(source_file, "w") as f: + f.write("source content") + with open(target_file, "w") as f: + f.write("target content") + + perform_diff(source_dir, target_dir) + + mock_run.assert_called_once_with( + ["git", "diff", "--no-index", source_file, target_file], + ) + + @patch("shutil.rmtree") + @patch("metaflow_diff.metaflow_diff.extract_code_package") + @patch("metaflow_diff.metaflow_diff.op_diff") + def test_run_op(self, mock_op_diff, mock_extract_code_package, mock_rmtree): + mock_tmp = tempfile.TemporaryDirectory() + mock_extract_code_package.return_value = mock_tmp + runspec = "HelloFlow/3" + + run_op(runspec, mock_op_diff, {}) + + mock_extract_code_package.assert_called_once_with(runspec, EXCLUSIONS) + mock_op_diff.assert_called_once_with(mock_tmp.name) + mock_rmtree.assert_called_once_with(mock_tmp.name) + + @patch("metaflow_diff.metaflow_diff.perform_diff") + def test_op_patch(self, mock_perform_diff): + mock_perform_diff.return_value = ["diff --git a/file.txt b/file.txt\n"] + + with tempfile.TemporaryDirectory() as tmpdir: + patch_file = os.path.join(tmpdir, "patch.patch") + + op_patch(tmpdir, patch_file) + + mock_perform_diff.assert_called_once_with(tmpdir, os.getcwd(), output=True) + with open(patch_file, "r") as f: + content = f.read() + self.assertIn("diff --git a/file.txt b/file.txt\n", content) + + +if __name__ == "__main__": + unittest.main()