Skip to content

Commit

Permalink
Merge pull request #713 from koordinates/files-patches
Browse files Browse the repository at this point in the history
Support patches to attached files
  • Loading branch information
olsen232 committed Sep 16, 2022
2 parents 71e55e1 + 52501f4 commit 91e87cd
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 52 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ _When adding new entries to the changelog, please include issue/PR numbers where
- Bugfix: directing geojson diff output to the console didn't work in a multi-dataset repo, even if only one dataset had changed. [#702](https://github.com/koordinates/kart/issues/702)
- Improve argument parsing for `kart diff` and `kart show`. [#706](https://github.com/koordinates/kart/issues/706)
- Bugfix: don't allow `kart merge` to fast-forward if the user specifies a merge message. [#705](https://github.com/koordinates/kart/issues/705)
- Kart now shows diffs to attached files that have been created using `kart commit-files`, if any [#583](https://github.com/koordinates/kart/issues/583)
- `diff`, `show`, `create-patch` and `apply` now handle attached files (as could already be created using `kart commit-files`) [#583](https://github.com/koordinates/kart/issues/583)

## 0.11.5

Expand Down
140 changes: 94 additions & 46 deletions kart/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,27 @@

from kart.completion_shared import ref_completer

from .diff_structs import Delta, DeltaDiff, KeyValue, RepoDiff
from .exceptions import (
from kart.cli_util import KartCommand
from kart.diff_structs import (
FILES_KEY,
KeyValue,
Delta,
DeltaDiff,
DatasetDiff,
RepoDiff,
)
from kart.exceptions import (
NO_TABLE,
NO_WORKING_COPY,
PATCH_DOES_NOT_APPLY,
InvalidOperation,
NotFound,
NotYetImplemented,
)
from .geometry import hex_wkb_to_gpkg_geom
from .schema import Schema
from .timestamps import iso8601_tz_to_timedelta, iso8601_utc_to_datetime
from kart.cli_util import KartCommand
from kart.geometry import hex_wkb_to_gpkg_geom
from kart.schema import Schema
from kart.serialise_util import b64decode_str, ensure_bytes
from kart.timestamps import iso8601_tz_to_timedelta, iso8601_utc_to_datetime

V1_NO_META_UPDATE = (
"Sorry, patches that make meta changes are not supported until Datasets V2\n"
Expand All @@ -41,11 +49,11 @@ class MetaChangeType(Enum):
META_UPDATE = auto()


def _meta_change_type(ds_diff_dict):
meta_diff = ds_diff_dict.get("meta", {})
if not meta_diff:
def _meta_change_type(ds_diff_input):
meta_diff_input = ds_diff_input.get("meta", {})
if not meta_diff_input:
return None
schema_diff = meta_diff.get("schema.json", {})
schema_diff = meta_diff_input.get("schema.json", {})
if "+" in schema_diff and "-" not in schema_diff:
return MetaChangeType.CREATE_DATASET
elif "-" in schema_diff and "+" not in schema_diff:
Expand Down Expand Up @@ -124,7 +132,7 @@ def parse(self, f):
)


class DeltaParser:
class FeatureDeltaParser:
"""Parses JSON for a delta - ie {"-": old-value, "+": new-value} - into a Delta object."""

def __init__(self, old_schema, new_schema, *, allow_minimal_updates=False):
Expand Down Expand Up @@ -178,6 +186,61 @@ def _build_signature(patch_metadata, person, repo):
return repo.author_signature(**signature)


def parse_file_diff(file_diff_input, allow_minimal_updates=None):
def convert_half_delta(half_delta):
if half_delta is None:
return None
val = half_delta.value
if val.startswith("base64:"):
return (half_delta.key, b64decode_str(val))
if val.startswith("text:"):
val = val[5:] # len("text:") = 5
return (half_delta.key, ensure_bytes(val))

def convert_delta(delta):
return Delta(convert_half_delta(delta.old), convert_half_delta(delta.new))

delta_diff = DeltaDiff(
convert_delta(
Delta.from_key_and_plus_minus_dict(
k, v, allow_minimal_updates=allow_minimal_updates
)
)
for (k, v) in file_diff_input.items()
)
return DatasetDiff([(FILES_KEY, delta_diff)])


def parse_meta_diff(meta_diff_input, allow_minimal_updates=False):
return DeltaDiff(
Delta.from_key_and_plus_minus_dict(
k, v, allow_minimal_updates=allow_minimal_updates
)
for (k, v) in meta_diff_input.items()
)


def parse_feature_diff(
feature_diff_input, dataset, meta_diff, allow_minimal_updates=False
):
old_schema = new_schema = None
if dataset is not None:
old_schema = new_schema = dataset.schema

schema_delta = meta_diff.get("schema.json") if meta_diff else None
if schema_delta and schema_delta.old_value:
old_schema = Schema.from_column_dicts(schema_delta.old_value)
if schema_delta and schema_delta.new_value:
new_schema = Schema.from_column_dicts(schema_delta.new_value)

delta_parser = FeatureDeltaParser(
old_schema,
new_schema,
allow_minimal_updates=allow_minimal_updates,
)
return DeltaDiff((delta_parser.parse(change) for change in feature_diff_input))


def apply_patch(
*,
repo,
Expand All @@ -192,10 +255,10 @@ def apply_patch(
except json.JSONDecodeError as e:
raise click.FileError("Failed to parse JSON patch file") from e

json_diff = patch.get("kart.diff/v1+hexwkb")
if json_diff is None:
json_diff = patch.get("sno.diff/v1+hexwkb")
if json_diff is None:
diff_input = patch.get("kart.diff/v1+hexwkb")
if diff_input is None:
diff_input = patch.get("sno.diff/v1+hexwkb")
if diff_input is None:
raise click.FileError(
"Failed to parse JSON patch file: patch contains no `kart.diff/v1+hexwkb` object"
)
Expand All @@ -208,7 +271,7 @@ def apply_patch(
raise click.UsageError("Patch contains no author or head information")

resolve_missing_values_from_rs = None
if "base" in metadata:
if metadata.get("base") is not None:
# if the patch has a `base` that's present in this repo,
# then we allow the `-` blobs to be missing, because we can resolve the `-` blobs
# from that revision.
Expand All @@ -230,6 +293,8 @@ def apply_patch(
except KeyError:
raise NotFound(f"No such ref {ref}")

allow_minimal_updates = bool(resolve_missing_values_from_rs)

rs = repo.structure(ref)
# TODO: this code shouldn't special-case tabular working copies
# Specifically, we need to check if those part(s) of the WC exists which the patch applies to.
Expand All @@ -241,46 +306,29 @@ def apply_patch(
repo.working_copy.check_not_dirty()

repo_diff = RepoDiff()
for ds_path, ds_diff_dict in json_diff.items():
for ds_path, ds_diff_input in diff_input.items():
if ds_path == FILES_KEY:
repo_diff[FILES_KEY] = parse_file_diff(ds_diff_input, allow_minimal_updates)
continue

dataset = rs.datasets().get(ds_path)
meta_change_type = _meta_change_type(ds_diff_dict)
meta_change_type = _meta_change_type(ds_diff_input)
check_change_supported(
repo.table_dataset_version, dataset, ds_path, meta_change_type, do_commit
)

meta_changes = ds_diff_dict.get("meta", {})
meta_diff_input = ds_diff_input.get("meta", {})

if meta_changes:
allow_minimal_updates = bool(resolve_missing_values_from_rs)
meta_diff = DeltaDiff(
Delta.from_key_and_plus_minus_dict(
k, v, allow_minimal_updates=allow_minimal_updates
)
for (k, v) in meta_changes.items()
)
if meta_diff_input:
meta_diff = parse_meta_diff(meta_diff_input, allow_minimal_updates)
repo_diff.recursive_set([ds_path, "meta"], meta_diff)
else:
meta_diff = None

feature_changes = ds_diff_dict.get("feature", [])
if feature_changes:
old_schema = new_schema = None
if dataset is not None:
old_schema = new_schema = dataset.schema

schema_delta = meta_diff.get("schema.json") if meta_diff else None
if schema_delta and schema_delta.old_value:
old_schema = Schema.from_column_dicts(schema_delta.old_value)
if schema_delta and schema_delta.new_value:
new_schema = Schema.from_column_dicts(schema_delta.new_value)

delta_parser = DeltaParser(
old_schema,
new_schema,
allow_minimal_updates=bool(resolve_missing_values_from_rs),
)
feature_diff = DeltaDiff(
(delta_parser.parse(change) for change in feature_changes)
feature_diff_input = ds_diff_input.get("feature", [])
if feature_diff_input:
feature_diff = parse_feature_diff(
feature_diff_input, dataset, meta_diff, allow_minimal_updates
)
repo_diff.recursive_set([ds_path, "feature"], feature_diff)

Expand Down
13 changes: 11 additions & 2 deletions kart/base_diff_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,11 @@ def write_file_diff(self, file_diff):
"""For outputting file_diff - all the files that have changed, without reference to any dataset."""
raise NotImplementedError()

BASE64_PREFIX = "base64:"
# Not having a text-prefix looks nicer but is slightly ambiguous in certain circumstances.
# Subclasses can override if ambiguity would be bad.
TEXT_PREFIX = ""

def _full_file_delta(self, delta, skip_binary_files=False):
def get_blob(half_delta):
return self.repo[half_delta.value] if half_delta else None
Expand All @@ -294,9 +299,13 @@ def is_binary(blob):
if skip_binary_files:
return delta

blob_to_text = lambda blob: "base64:" + b64encode_str(memoryview(blob))
blob_to_text = lambda blob: self.BASE64_PREFIX + b64encode_str(
memoryview(blob)
)
else:
blob_to_text = lambda blob: str(memoryview(blob), "utf-8")
blob_to_text = lambda blob: self.TEXT_PREFIX + str(
memoryview(blob), "utf-8"
)

old_half_delta = (delta.old_key, blob_to_text(old_blob)) if delta.old else None
new_half_delta = (delta.new_key, blob_to_text(new_blob)) if delta.new else None
Expand Down
6 changes: 5 additions & 1 deletion kart/json_diff_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from kart.diff_structs import FILES_KEY, BINARY_FILE, DatasetDiff
from kart.log import commit_obj_to_json
from kart.output_util import dump_json_output, resolve_output_path
from kart.serialise_util import b64encode_str
from kart.tabular.feature_output import feature_as_geojson, feature_as_json
from kart.timestamps import datetime_to_iso8601_utc, timedelta_to_iso8601_tz

Expand Down Expand Up @@ -148,6 +147,9 @@ class PatchWriter(JsonDiffWriter):
- it is at the key "kart.patch/v1" instead of "kart.show/v1"
"""

# Avoid any ambiguity in file-patches.
TEXT_PREFIX = "text:"

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.nonmatching_item_counts = {p: 0 for p in self.all_ds_paths}
Expand All @@ -171,6 +173,8 @@ def add_json_header(self, obj):
"message": self.commit.message,
"base": original_parent,
}
if not original_parent:
del obj["kart.patch/v1"]["base"]

def record_spatial_filter_stat(
self, ds_path, item_type, key, delta, old_match_result, new_match_result
Expand Down
34 changes: 34 additions & 0 deletions kart/structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import click
import pygit2

from kart.diff_structs import FILES_KEY
from .exceptions import (
NO_CHANGES,
NO_COMMIT,
Expand Down Expand Up @@ -235,6 +236,14 @@ def create_tree_from_diff(
object_builder.insert(path, blob)

for ds_path, ds_diff in repo_diff.items():
if ds_path == FILES_KEY:
self.apply_files_diff(
ds_diff.get(FILES_KEY),
object_builder,
resolve_missing_values_from_rs=resolve_missing_values_from_rs,
)
continue

schema_delta = ds_diff.recursive_get(["meta", "schema.json"])
if schema_delta and self.repo.table_dataset_version < 2:
# This should have been handled already, but just to be safe.
Expand Down Expand Up @@ -275,10 +284,14 @@ def create_tree_from_diff(
return tree

def check_values_match_schema(self, repo_diff):
# TODO - checking data validity within datasets should be delegated to the datasets class.
all_features_valid = True
violations = {}

for ds_path, ds_diff in repo_diff.items():
if ds_path == FILES_KEY:
continue

ds_violations = {}
violations[ds_path] = ds_violations

Expand Down Expand Up @@ -323,6 +336,27 @@ def check_values_match_schema(self, repo_diff):
exit_code=SCHEMA_VIOLATION,
)

def apply_files_diff(
self, file_diff, object_builder, resolve_missing_values_from_rs=None
):
if not file_diff:
return

for path, delta in file_diff.items():
if DATASET_PATH_PATTERN.search(path):
raise InvalidOperation(
f"Applying <files> diff shouldn't change the contents of a dataset:\n{path}",
exit_code=SCHEMA_VIOLATION,
)

# TODO - check for conflicts.

assert isinstance(delta.new_value, (bytes, type(None)))
if delta.new_value is not None:
object_builder.insert(path, delta.new_value)
else:
object_builder.remove(path)

def commit_diff(
self,
wcdiff,
Expand Down
19 changes: 19 additions & 0 deletions tests/data/patches/points-attach-files.kartpatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"kart.patch/v1": {
"authorName": "Andrew Olsen",
"authorEmail": "andrew.olsen@koordinates.com",
"authorTime": "2022-06-11T11:03:58Z",
"authorTimeOffset": "+12:00",
"message": "Add attachments to nz_pa_points_topo_150k"
},
"kart.diff/v1+hexwkb": {
"<files>": {
"LICENSE.txt": {
"+": "text:NZ Pa Points (Topo, 1:50k)\nhttps://data.linz.govt.nz/layer/50308-nz-pa-points-topo-150k/\nLand Information New Zealand\nCC-BY\n"
},
"logo.png": {
"+": "base64:iVBORw0KGgoAAAANSUhEUgAAAE4AAAAfCAYAAAE-f8vZAAAABGdBTUEAALGPC_xhBQAAAUJpQ0NQSUNDIHByb2ZpbGUAACiRY2BgEkgsKMhhYWBgyM0rKQpyd1KIiIxSYH_GwMwgwyDCoMygk5hcXOAYEOADVMIAo1HBt2sMjCD6si7IrIpVRT1lCb1JrMd0D8iFH0_GVI8CuFJSi0Fq_gBxUnJBUQkDA2MCkK1cXlIAYrcA2SJFQEcB2TNA7HQIew2InQRhHwCrCQlyBrKvANkCyRmJKUD2EyBbJwlJPB2JDbUXBDhCjSxcTS0NCDiVdFCSWlECop3zCyqLMtMzShQcgSGUquCZl6yno2BkYGTEwAAKb4jqz2LgcGQUO4UQy37EwGBpzcDA9BkhlhDKwLA1hoGBVxshpjWfgUEwk4HhMH9BYlEi3AGM31iK04yNIGyeIgYG1h___3-WZWBg38XA8Lfo___fc____7uEgYH5JgPDgUIAIP1c7ohagCoAAAAHdElNRQfmCQYWHxDs9K6qAAAIkElEQVRo3t1YeVCV1xU_995vfewBjRsSRdNiqEixpAZEFrfWPzRJTWJGa2JVMBlrldia1DZjxyzuJu6aqGNcGOOkNUm1BRMhbogaS6pgWtxqFESesvm9b723fxCQ93ir4MuMZ-b98e5377ln_Z1zLsD3ZNs4sgxciMRHgbwm-xMAAAQAYNsymrV-VGYUIgAAefmISShC3N26juS12VVI5OLBDVlnb4Zq68vv2raMZh45AgKwbb63DgAA8prsv4izk2NcOcprcz7DrVe359aeY_t1andkdNjoekjIGxzHpfS44rSR1ipD1D8eLXcnd4splmZkoCipxB1HdzfZNo_SACHBaZGyJiW3KBzJ72dvQjI3EwIkppo7HbO_nAIAYFuXcxQEkgYAwNHLDQvJoGiPDI2_VT3a_j8_fkANIEBI4iaDjUwBxYJWZsCY3qaSbdMoBTCS2zS40dxXffP4NXeXIACQPXiJtC4Is5JCxBmDv-LG9kswD1wu9KWyvHzEM_yEgXu4EX2umkVXq9zGa5t9mo1ZjrmHN7p1iJv9yoxChGybRxuAgHN3SF1RhuiFes-MTKoChyXwFKX-UvswauWBoQuJ8_aRVjePt07WlDqFyIQBNwEA5BUjXnbkl2yTV2ctaFNTfCM1mvSLrPOlSrswugMYRXYEDfY_rL1dZjcr7WGuQnlKLSW3KIqp5k4nzyvGaiWvKM7pgPja0EjyeFQNICR60px-1xSvLjpxJVB74v6RIM5N2YskMtFLum40tp-fZZ652ZYgIP7hZ93IgKjaQC6jt5Sfqm8cPetzIwKwbRxVDxhF-M3cYteVvKI-SJyb0oMMiq6-n-hid9Rs_cNzh73tEfOHUkCAnBNWf9GRX7IHaEvWyCsy56FwYYUzc2Z4LTwPgswTN0L0recU13Uhd3BfbmiPq06hAAwOQRCJJHff6jbHk7rtcQfK4A3ivJE7-HMD0E49grMZqQYcdpt81O4Y3hYLnqqr21hrNnIdcw9vDkQR26ZRKmCPKNA-GeqVvKKoNst18P8rSTJJiJ6HRJIDCGGmWcetirql-vry-q5wrbwsYxwI3FQkkjimmZeYYn6ovn7kkFu3ui6I7w4fi0L4mUBwf6DsGqjmDn1t-cfWlc7LJrw65BGSEP064nBGS07SQ9a5W0v0Td80eRVOXpP9HpK433p0p2Z9ou-qfNY6cSNgoaRFT8XiXqFXPBZLxnTrwu2e2soztzsIZ9s46g4QNzWpAxMwlZmFfEBuXJX5EgoVtvkF7o3aL9X8koP3snXDyMvA4cf8Lw_srvLqF6FgUn8sNgj3Cj0fiDLWlYae2lsna7C0OC01IMEAADAKkZdmvOyzcsWI4FEwxigwcNtYkbiI6y2dl0unGghpy08jb9_5lwYNwzG24y5Wb9Y--CbcOnWTAQAI0xM5LrWnAxBywlnramPf-24L75f0D_6NzZPVTnfyvxpI-DH9TJcOZRcOpmBg0NuuggEAGPv-awEDw2U5O7jCIUDemyuX0A6qcByOIokxHYQQfpPIuanthzqVEK7zYoesS-k-BMeG_9MlIRTjwMUwY_8l-v2kx3PJ3ZUOCXG5IRZJi9OG4kdDTgXcaDbp0xzzirf5ghL5nRHMA5iDRyczsJSZhRxWFx47DRatCkgyypod-cU-EZ_VaUBr7g7yL8LaW62-V1vMKXmHBoLF7P6ZjOlKblEY-AlA6p-OVbJGbbLfHmnQcrR3ymqdEkLJK4phDnOZj8eMAvXdU2KgIeDIL9llXW3sDQwsL95QzQp7pOO1ki-9GldakpGJJDIDidwAplvXQLN2qPNLPu0KtBbyBoeTxG7zkUiGA2OUadZh64J9ub6u3BGA51vMKryV_mMUJsxBAnkOCHrEj3h0MJMeYI36aqu0-pixvyqoFait6oyOQySr75MoXJiLCB4HBIX404Uzw_qYNeqr9N2VlfS83X9QBgCQFqcl42i5ADj8eKc1oOw2a9SnOeaX7A-GwaQlw3-BI6TtQFD3zo-x9CJt0F5UFxwp82g4HBsGQl5SKo6RCwN6OPAbIcBijdpUY99_dpml1V3KmvQJB2HOkIkoQtzZ4bW1K4iyu_S2Otb4qOKoVWG_Zzh-cgLinuy5HUncr4PQM522KuvStDX_0rskJackcPyw3sXA47QHLTpTrb1WRd0L-oZyhsmwXsD9vNfeoBgNAIDHQ0lCTDmXFct3lhUGQvhhvU_7ZTQGlOnWP-gd9Vlae7ePVWGPprVKHKvXJoNBT_rVikrkOfJEzOf8hAEIySszp6IwYXvQ0dukVYyya53qqQnqDcQHDjOwaL06MSQn_q_2lG0-Ruus36FQfqWvosma9Txk2zDyCHA4HR5SonbHMHXBkVK_3yVWZ01CIfxuH3DzNWYm7fOwGg0sellfcOR0QBB88NJBYKB5jTjK-mPE4e8eWsMR3E94Oz01IAge028cIBB9QMRFzBRj0w-iFGMGUKZ26seYz8qMY2xfySsyX_Dz-XA-ChM-8im6Zm1G5CcxIOQmFSCRPB_EZ5wL-t5vB5vF14zOVlVpfdYZ4HGSf6OsVQwOcwsz6XFW52hGUVIUkrlMJHGvAI-T_fK3Zn1qfn5xQksf9_yPgEvvvQVJ3PQgGO2UVVmX3mV93Ph4jh_zWDHw5MH3cZq1y_y6Zoqx9Ty7V3ZFAOnP6ck4Wv4CCIp6AJODyerVKfr2igKroq5LWWNZAmFR6tM4Uix4QJNDA7U7xurrykvp9Sb3syo_PRFIXEQijpYLgMdPdL6ysVrWoE1L-f2-vx_tgvHR56y6LGMkDhN3AEE9u6DX_JbaHZPUhcfOBvQ6ggBAXPRUPIoUZyORmwQEdfN1BihrYgb9jDXp75v7q8rM0uof5nUkKxaRnL7JKFKagzg8AQgK96N9ucV0upfdUVfpb566SMEzBP8f5MbbPjlAJfcAAAAASUVORK5CYII="
}
}
}
}
27 changes: 26 additions & 1 deletion tests/test_apply.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import tempfile
from contextlib import contextmanager
from pathlib import Path

Expand Down Expand Up @@ -756,3 +755,29 @@ def _benchmark_apply(*args, **kwargs):
monkeypatch.setattr(apply, "apply_patch", _benchmark_apply)

cli_runner.invoke(["apply", "-"], input=patch_text)


def test_apply_attach_files(data_working_copy, cli_runner):
patch_filename = "points-attach-files.kartpatch"
with data_working_copy("points") as (repo_dir, wc_path):
patch_path = patches / patch_filename
r = cli_runner.invoke(["apply", patch_path])
assert r.exit_code == 0, r.stderr

r = cli_runner.invoke(["show", "--diff-files"])
assert r.exit_code == 0, r.stderr
assert r.stdout.splitlines()[1:] == [
"Author: Andrew Olsen <andrew.olsen@koordinates.com>",
"Date: Sat Jun 11 23:03:58 2022 +1200",
"",
" Add attachments to nz_pa_points_topo_150k",
"",
"+++ LICENSE.txt",
"+ NZ Pa Points (Topo, 1:50k)",
"+ https://data.linz.govt.nz/layer/50308-nz-pa-points-topo-150k/",
"+ Land Information New Zealand",
"+ CC-BY",
"+ ",
"+++ logo.png",
"+ (binary file f8555b6)",
]

0 comments on commit 91e87cd

Please sign in to comment.