Skip to content

Commit

Permalink
Merge pull request #699 from koordinates/filtered-diffs
Browse files Browse the repository at this point in the history
Spatial filtered PC diffs
  • Loading branch information
olsen232 committed Aug 23, 2022
2 parents 55484dc + e0ca8a5 commit 70f980b
Show file tree
Hide file tree
Showing 16 changed files with 449 additions and 177 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ Please note that compatibility for 0.x releases (software or repositories) isn't

_When adding new entries to the changelog, please include issue/PR numbers wherever possible._

## 0.11.6 (UNRELEASED)

- "Primary key conflicts" - conflicts caused by reusing a primary key which is already assigned to a feature outside the spatial filter - are renamed to "spatial filter conflicts" for future proofing with other dataset types. Consequently, commit option `--allow-pk-conflicts` is renamed to `--allow-spatial-filter-conflicts`.

## 0.11.5

### Major changes
Expand Down
4 changes: 2 additions & 2 deletions docs/pages/command_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -526,10 +526,10 @@ if you force Kart to commit it anyway, then that feature will be
overwritten.

Kart will warn about these conflicts when running ``kart status`` or
``kart diff``. They are called "primary key conflicts". Kart attempts to
``kart diff``. They are called "spatial filter conflicts". Kart attempts to
help you avoid them by setting up the working copy so that the next
primary key in the sequence that is chosen by default will not conflict
with any existing features. If you do accidentally create primary key
with any existing features. If you do accidentally create spatial filter
conflicts, the appropriate fix is to reassign the conflicting features
new primary key values that are not used elsewhere.

Expand Down
3 changes: 3 additions & 0 deletions kart/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ class BaseDataset(DatasetDiffMixin, metaclass=BaseDatasetMetaClass):
DATASET_DIRNAME = None # Example: ".hologram-dataset.v1".
# (This should match the pattern DATASET_DIRNAME_PATTERN in kart.structure.)

# The name / type of main items that make up this dataset - ie, not the meta items.
ITEM_TYPE = None

WORKING_COPY_PART_TYPE = None # Example: working_copy.PartType.TABULAR

# Paths are generally relative to self.inner_tree, but datasets may choose to put extra data in the outer
Expand Down
238 changes: 182 additions & 56 deletions kart/base_diff_writer.py

Large diffs are not rendered by default.

28 changes: 16 additions & 12 deletions kart/commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .core import check_git_user
from .exceptions import (
NO_CHANGES,
SPATIAL_FILTER_PK_CONFLICT,
SPATIAL_FILTER_CONFLICT,
InvalidOperation,
NotFound,
SubprocessError,
Expand All @@ -40,11 +40,11 @@ def __init__(self, *args, **kwargs):

if not self.spatial_filter.match_all:
self.record_spatial_filter_stats = True
self.spatial_filter_pk_conflicts = RepoKeyFilter()
self.spatial_filter_conflicts = RepoKeyFilter()
self.now_outside_spatial_filter = RepoKeyFilter()
else:
self.record_spatial_filter_stats = False
self.spatial_filter_pk_conflicts = None
self.spatial_filter_conflicts = None
self.now_outside_spatial_filter = None

def get_repo_diff(self):
Expand All @@ -57,14 +57,14 @@ def get_repo_diff(self):
return repo_diff

def record_spatial_filter_stat(
self, ds_path, key, delta, old_match_result, new_match_result
self, ds_path, item_type, key, delta, old_match_result, new_match_result
):
super().record_spatial_filter_stat(
ds_path, key, delta, old_match_result, new_match_result
ds_path, item_type, key, delta, old_match_result, new_match_result
)
if delta.new is not None and not new_match_result:
self.now_outside_spatial_filter.recursive_set(
[ds_path, "feature", key], True
[ds_path, item_type, key], True
)


Expand All @@ -90,7 +90,7 @@ def record_spatial_filter_stat(
),
)
@click.option(
"--allow-pk-conflicts",
"--allow-spatial-filter-conflicts",
is_flag=True,
default=False,
help=(
Expand Down Expand Up @@ -123,7 +123,7 @@ def commit(
ctx,
message,
allow_empty,
allow_pk_conflicts,
allow_spatial_filter_conflicts,
convert_to_dataset_format,
output_format,
filters,
Expand All @@ -147,13 +147,17 @@ def commit(
if not wc_diff and not allow_empty:
raise NotFound("No changes to commit", exit_code=NO_CHANGES)

pk_conflicts = commit_diff_writer.spatial_filter_pk_conflicts
if not allow_pk_conflicts and pk_conflicts and any(pk_conflicts.values()):
sf_conflicts = commit_diff_writer.spatial_filter_conflicts
if (
not allow_spatial_filter_conflicts
and sf_conflicts
and any(sf_conflicts.values())
):
commit_diff_writer.write_warnings_footer()
raise InvalidOperation(
"Aborting commit due to conflicting primary key values - use --allow-pk-conflicts to commit anyway "
"Aborting commit due to conflicting primary key values - use --allow-spatial-filter-conflicts to commit anyway "
"(this will overwrite some existing features that are outside of the current spatial filter)",
exit_code=SPATIAL_FILTER_PK_CONFLICT,
exit_code=SPATIAL_FILTER_CONFLICT,
)

do_json = output_format == "json"
Expand Down
2 changes: 1 addition & 1 deletion kart/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
UNSUPPORTED_VERSION = 24
CRS_ERROR = 25
GEOMETRY_ERROR = 26
SPATIAL_FILTER_PK_CONFLICT = 27
SPATIAL_FILTER_CONFLICT = 27
INVALID_FILE_FORMAT = 28
UNCOMMITTED_CHANGES = 29
# Ran out of 2x numbers. Oh well.
Expand Down
8 changes: 3 additions & 5 deletions kart/html_diff_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ def write_diff(self):
all_datasets_geojson = {
ds_path: {
"type": "FeatureCollection",
"features": self.filtered_ds_feature_deltas_as_geojson(
ds_path, ds_diff
),
"features": self.filtered_dataset_deltas_as_geojson(ds_path, ds_diff),
}
for ds_path, ds_diff in repo_diff.items()
}
Expand All @@ -70,6 +68,6 @@ def write_diff(self):
self.write_warnings_footer()


HtmlDiffWriter.filtered_ds_feature_deltas_as_geojson = (
GeojsonDiffWriter.filtered_ds_feature_deltas_as_geojson
HtmlDiffWriter.filtered_dataset_deltas_as_geojson = (
GeojsonDiffWriter.filtered_dataset_deltas_as_geojson
)
80 changes: 39 additions & 41 deletions kart/json_diff_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,25 +95,23 @@ def default(self, obj):
key: self._postprocess_simple_delta(value)
for key, value in ds_diff["meta"].items()
}
if "feature" in ds_diff:
result["feature"] = self.filtered_ds_feature_deltas_as_json(
item_type = self._get_old_or_new_dataset(ds_path).ITEM_TYPE
if item_type and item_type in ds_diff:
result[item_type] = self.filtered_dataset_deltas_as_json(
ds_path, ds_diff
)
if "tile" in ds_diff:
result["tile"] = [
self._postprocess_simple_delta(value)
for key, value in ds_diff["tile"].items()
]
return result

return None

def filtered_ds_feature_deltas_as_json(self, ds_path, ds_diff):
if "feature" not in ds_diff:
def filtered_dataset_deltas_as_json(self, ds_path, ds_diff):
item_type = self._get_old_or_new_dataset(ds_path).ITEM_TYPE
if not item_type or item_type not in ds_diff:
return

old_transform, new_transform = self.get_geometry_transforms(ds_path, ds_diff)
for key, delta in self.filtered_ds_feature_deltas(ds_path, ds_diff):

for key, delta in self.filtered_dataset_deltas(ds_path, ds_diff):
delta_as_json = {}

if delta.old:
Expand Down Expand Up @@ -144,7 +142,7 @@ class PatchWriter(JsonDiffWriter):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.nonmatching_feature_counts = {p: 0 for p in self.all_ds_paths}
self.nonmatching_item_counts = {p: 0 for p in self.all_ds_paths}

def add_json_header(self, obj):
if self.commit is not None:
Expand All @@ -167,26 +165,29 @@ def add_json_header(self, obj):
}

def record_spatial_filter_stat(
self, ds_path, key, delta, old_match_result, new_match_result
self, ds_path, item_type, key, delta, old_match_result, new_match_result
):
"""
Records which / how many features were inside / outside the spatial filter for which reasons.
These records are used by write_warnings_footer to show warnings to the user.
"""
if not old_match_result and not new_match_result:
self.nonmatching_feature_counts[ds_path] += 1
self.nonmatching_item_counts[ds_path] += 1

def write_warnings_footer(self):
super().write_warnings_footer()
if any(self.nonmatching_feature_counts.values()):
if any(self.nonmatching_item_counts.values()):
click.secho(
"Warning: The generated patch does not contain the entire commit: ",
bold=True,
err=True,
)
for ds_path, count in self.nonmatching_feature_counts.items():
for ds_path, count in self.nonmatching_item_counts.items():
if not count:
continue
item_type = self._get_old_or_new_dataset(ds_path).ITEM_TYPE
click.echo(
f" In dataset {ds_path} there are {count} changed features not included due to spatial filter",
f" In dataset {ds_path} there are {count} changed {item_type}s not included due to spatial filter",
err=True,
)

Expand Down Expand Up @@ -313,8 +314,7 @@ def write_ds_diff(self, ds_path, ds_diff):
)

self.write_meta_deltas(ds_path, ds_diff)
self.write_feature_deltas(ds_path, ds_diff)
self.write_tile_deltas(ds_path, ds_diff)
self.write_filtered_dataset_deltas(ds_path, ds_diff)

def write_meta_deltas(self, ds_path, ds_diff):
if "meta" not in ds_diff:
Expand All @@ -326,13 +326,17 @@ def write_meta_deltas(self, ds_path, ds_diff):
obj["change"] = delta.to_plus_minus_dict()
self.dump(obj)

def write_feature_deltas(self, ds_path, ds_diff):
if "feature" not in ds_diff:
def write_filtered_dataset_deltas(self, ds_path, ds_diff):
item_type = self._get_old_or_new_dataset(ds_path).ITEM_TYPE
if not item_type or item_type not in ds_diff:
return

old_transform, new_transform = self.get_geometry_transforms(ds_path, ds_diff)
obj = {"type": "feature", "dataset": ds_path, "change": None}
for key, delta in self.filtered_ds_feature_deltas(ds_path, ds_diff):

obj = {"type": item_type, "dataset": ds_path, "change": None}

for key, delta in self.filtered_dataset_deltas(ds_path, ds_diff):
obj["type"] = item_type
change = {}
if delta.old:
change["-"] = feature_as_json(
Expand All @@ -345,18 +349,6 @@ def write_feature_deltas(self, ds_path, ds_diff):
obj["change"] = change
self.dump(obj)

def write_tile_deltas(self, ds_path, ds_diff):
if "tile" not in ds_diff:
return

obj = {"type": "tile", "dataset": ds_path, "change": None}
for key, delta in ds_diff["tile"].sorted_items():
change = delta.to_plus_minus_dict()
for char in change:
change[char] = {"name": key, **change[char]}
obj["change"] = change
self.dump(obj)

def write_warnings_footer(self):
# If there's an estimate thread running (see write_header()), ask it to terminate
terminate_estimate_thread.set()
Expand Down Expand Up @@ -418,13 +410,11 @@ def write_diff(self):
if not ds_diff:
continue

self._warn_about_any_meta_diffs(ds_path, ds_diff)
self._warn_about_any_non_feature_diffs(ds_path, ds_diff)
has_changes = True
output_obj = {
"type": "FeatureCollection",
"features": self.filtered_ds_feature_deltas_as_geojson(
ds_path, ds_diff
),
"features": self.filtered_dataset_deltas_as_geojson(ds_path, ds_diff),
}

if self.output_path == "-":
Expand All @@ -440,23 +430,31 @@ def write_diff(self):
self.has_changes = has_changes
self.write_warnings_footer()

def _warn_about_any_meta_diffs(self, ds_path: str, ds_diff: DatasetDiff) -> None:
def _warn_about_any_non_feature_diffs(
self, ds_path: str, ds_diff: DatasetDiff
) -> None:
if "meta" in ds_diff:
meta_changes = ", ".join(ds_diff["meta"].keys())
click.echo(
f"Warning: {ds_path} meta changes aren't included in GeoJSON output: {meta_changes}",
err=True,
)
if "tile" in ds_diff:
count = len(ds_diff["tile"])
click.echo(
f"Warning: {count} tile changes in {ds_path} aren't included in GeoJSON output",
err=True,
)

def filtered_ds_feature_deltas_as_geojson(
def filtered_dataset_deltas_as_geojson(
self, ds_path: str, ds_diff: DatasetDiff
) -> Union[None, dict]:
if "feature" not in ds_diff:
return

old_transform, new_transform = self.get_geometry_transforms(ds_path, ds_diff)

for key, delta in self.filtered_ds_feature_deltas(ds_path, ds_diff):
for key, delta in self.filtered_dataset_deltas(ds_path, ds_diff):
if delta.old:
change_type = "U-" if delta.new else "D"
yield feature_as_geojson(
Expand Down
5 changes: 4 additions & 1 deletion kart/point_cloud/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from kart.base_dataset import BaseDataset, MetaItemDefinition, MetaItemFileType
from kart.core import find_blobs_in_tree
from kart.decorators import allow_classmethod
from kart.diff_structs import DatasetDiff, DeltaDiff, Delta, KeyValue
from kart.diff_structs import DatasetDiff, DeltaDiff, Delta, KeyValue, WORKING_COPY_EDIT
from kart.key_filters import DatasetKeyFilter, FeatureKeyFilter
from kart.list_of_conflicts import ListOfConflicts, InvalidNewValue
from kart.lfs_util import (
Expand Down Expand Up @@ -41,6 +41,8 @@ class PointCloudV1(BaseDataset):
DATASET_TYPE = "point-cloud"
DATASET_DIRNAME = ".point-cloud-dataset.v1"

ITEM_TYPE = "tile"

WORKING_COPY_PART_TYPE = PartType.WORKDIR

# All relative paths should be relative to self.inner_tree - that is, to the tree named DATASET_DIRNAME.
Expand Down Expand Up @@ -281,6 +283,7 @@ def diff_to_working_copy(
new_half_delta = tilename, new_tile_summary

tile_delta = Delta(old_half_delta, new_half_delta)
tile_delta.flags = WORKING_COPY_EDIT
tile_diff[tilename] = tile_delta

if not tile_diff:
Expand Down

0 comments on commit 70f980b

Please sign in to comment.