Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.idea
__pycache__
*.egg-info
*.pyc
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,8 @@ metaflow-diff patch --file my.patch HelloFlow/5
Produce a patch file that, if applied, changes the code in the current
working directory to match that of the run.

```
metaflow-diff diff-runs HelloFlow/5 HelloFlow/6
```

Show diff between the two given runs.
142 changes: 93 additions & 49 deletions metaflow_diff/metaflow_diff.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
#!/usr/bin/env python
import sys
import os
import shutil
import sys
from subprocess import PIPE, run
from tempfile import TemporaryDirectory
from subprocess import call, run, PIPE
import os

from metaflow import namespace, Run
from metaflow.cli import echo_always
import click

from metaflow import Run, namespace
from metaflow.cli import echo_always

EXCLUSIONS = [
"metaflow/",
Expand All @@ -20,34 +18,93 @@
]


def git_diff(tmpdir, output=False):
for dirpath, dirnames, filenames in os.walk(tmpdir):
def echo(line):
echo_always(line, err=True, fg="magenta")


def extract_code_package(runspec, exclusions):
try:
namespace(None)
run = Run(runspec)
echo(f"✅ Run *{runspec}* found, downloading code..")
except:
echo(f"❌ Run **{runspec}** not found")
sys.exit(1)

if run.code is None:
echo(
f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?"
)
sys.exit(1)

tar = run.code.tarball
members = [
m for m in tar.getmembers() if not any(m.name.startswith(x) for x in exclusions)
]

tmp = TemporaryDirectory()
tar.extractall(tmp.name, members)
return tmp


def perform_diff(source_dir, target_dir, output=False):
diffs = []
for dirpath, dirnames, filenames in os.walk(source_dir):
for fname in filenames:
rel = os.path.relpath(dirpath, tmpdir)
gitpath = os.path.join(rel, fname)
if os.path.exists(gitpath):
source_file = os.path.join(dirpath, fname)
rel_path = os.path.relpath(source_file, source_dir)
target_file = os.path.join(target_dir, rel_path)

if os.path.exists(target_file):
cmd = [
"git",
"diff",
"--no-index",
gitpath,
os.path.join(dirpath, fname),
"--exit-code",
"--color",
source_file,
target_file,
]
result = run(cmd, text=True, stdout=PIPE, stderr=PIPE)
if result.returncode == 0:
echo(f"✅ {rel_path} is identical, skipping")
continue

if output:
yield run(cmd, text=True, stdout=PIPE).stdout
diffs.append(result.stdout)
else:
run(cmd)
run(["less", "-R"], input=result.stdout, text=True)
else:
echo(f"❗ {gitpath} not in the Git repo, skipping")
echo(f"❗ {rel_path} not in the target directory, skipping")
return diffs if output else None


def echo(line):
echo_always(line, err=True, fg="magenta")
def run_op(runspec, op, op_args):
tmp = None
try:
tmp = extract_code_package(runspec, EXCLUSIONS)
op(tmp.name, **op_args)
finally:
if tmp and os.path.exists(tmp.name):
shutil.rmtree(tmp.name)


def run_op_diff_runs(source_run, target_run):
source_tmp = None
target_tmp = None
try:
source_tmp = extract_code_package(source_run, EXCLUSIONS)
target_tmp = extract_code_package(target_run, EXCLUSIONS)
perform_diff(source_tmp.name, target_tmp.name)
finally:
if source_tmp and os.path.exists(source_tmp.name):
shutil.rmtree(source_tmp.name)
if target_tmp and os.path.exists(target_tmp.name):
shutil.rmtree(target_tmp.name)


def op_diff(tmpdir):
for _ in git_diff(tmpdir):
pass
perform_diff(tmpdir, os.getcwd())


def op_pull(tmpdir, dst=None):
Expand All @@ -59,8 +116,9 @@ def op_pull(tmpdir, dst=None):


def op_patch(tmpdir, dst=None):
diffs = perform_diff(tmpdir, os.getcwd(), output=True)
with open(dst, "w") as f:
for out in git_diff(tmpdir, output=True):
for out in diffs:
out = out.replace(tmpdir, "/.")
out = out.replace("+++ b/./", "+++ b/")
out = out.replace("--- b/./", "--- b/")
Expand All @@ -81,34 +139,6 @@ def op_patch(tmpdir, dst=None):
)


def run_op(runspec, op, op_args):
try:
namespace(None)
run = Run(runspec)
echo(f"✅ Run *{runspec}* found, downloading code..")
except:
echo(f"❌ Run **{runspec}** not found")
sys.exit(1)
if run.code is None:
echo(
f"❌ Run **{runspec}** doesn't have a code package. Maybe it's a local run?"
)
sys.exit(1)
tar = run.code.tarball
members = []
for m in tar.getmembers():
if not any(m.name.startswith(x) for x in EXCLUSIONS):
members.append(m)
tmp = None
try:
tmp = TemporaryDirectory()
tar.extractall(tmp.name, members)
op(tmp.name, **op_args)
finally:
if tmp and os.path.exists(tmp.name):
shutil.rmtree(tmp.name)


@click.group()
def cli():
pass
Expand All @@ -124,6 +154,16 @@ def diff(metaflow_run=None):
run_op(metaflow_run, op_diff, {})


@cli.command()
@click.argument("source_run")
@click.argument("target_run")
def diff_runs(source_run, target_run):
"""
Show a 'git diff' between two Metaflow runs, e.g. HelloFlow/3 (source) and HelloFlow/4 (target)
"""
run_op_diff_runs(source_run, target_run)


@cli.command()
@click.argument("metaflow_run")
@click.option(
Expand All @@ -150,3 +190,7 @@ def patch(metaflow_run, file=None):
if file is None:
file = metaflow_run.lower().replace("/", "_") + ".patch"
run_op(metaflow_run, op_patch, {"dst": file})


if __name__ == "__main__":
cli()
82 changes: 82 additions & 0 deletions tests/test_metaflow_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import tempfile
import unittest
from subprocess import PIPE
from unittest.mock import MagicMock, patch

from metaflow_diff.metaflow_diff import (
EXCLUSIONS,
extract_code_package,
op_diff,
op_patch,
op_pull,
perform_diff,
run_op,
)


class TestMetaflowDiff(unittest.TestCase):

@patch("metaflow_diff.metaflow_diff.Run")
@patch("metaflow_diff.metaflow_diff.namespace")
def test_extract_code_package(self, mock_namespace, mock_run):
mock_run.return_value.code.tarball.getmembers.return_value = []
mock_run.return_value.code.tarball.extractall = MagicMock()
runspec = "HelloFlow/3"

with patch(
"tempfile.TemporaryDirectory", return_value=tempfile.TemporaryDirectory()
) as mock_tmp:
tmp = extract_code_package(runspec, EXCLUSIONS)

mock_namespace.assert_called_once_with(None)
mock_run.assert_called_once_with(runspec)
self.assertTrue(os.path.exists(tmp.name))

@patch("metaflow_diff.metaflow_diff.run")
def test_perform_diff(self, mock_run):
with tempfile.TemporaryDirectory() as source_dir, tempfile.TemporaryDirectory() as target_dir:
source_file = os.path.join(source_dir, "file.txt")
target_file = os.path.join(target_dir, "file.txt")
with open(source_file, "w") as f:
f.write("source content")
with open(target_file, "w") as f:
f.write("target content")

perform_diff(source_dir, target_dir)

mock_run.assert_called_once_with(
["git", "diff", "--no-index", source_file, target_file],
)

@patch("shutil.rmtree")
@patch("metaflow_diff.metaflow_diff.extract_code_package")
@patch("metaflow_diff.metaflow_diff.op_diff")
def test_run_op(self, mock_op_diff, mock_extract_code_package, mock_rmtree):
mock_tmp = tempfile.TemporaryDirectory()
mock_extract_code_package.return_value = mock_tmp
runspec = "HelloFlow/3"

run_op(runspec, mock_op_diff, {})

mock_extract_code_package.assert_called_once_with(runspec, EXCLUSIONS)
mock_op_diff.assert_called_once_with(mock_tmp.name)
mock_rmtree.assert_called_once_with(mock_tmp.name)

@patch("metaflow_diff.metaflow_diff.perform_diff")
def test_op_patch(self, mock_perform_diff):
mock_perform_diff.return_value = ["diff --git a/file.txt b/file.txt\n"]

with tempfile.TemporaryDirectory() as tmpdir:
patch_file = os.path.join(tmpdir, "patch.patch")

op_patch(tmpdir, patch_file)

mock_perform_diff.assert_called_once_with(tmpdir, os.getcwd(), output=True)
with open(patch_file, "r") as f:
content = f.read()
self.assertIn("diff --git a/file.txt b/file.txt\n", content)


if __name__ == "__main__":
unittest.main()