Skip to content

Commit

Permalink
Add a tags.json meta item, require it to be list of strings
Browse files Browse the repository at this point in the history
  • Loading branch information
olsen232 committed Sep 15, 2022
1 parent 5ce2f42 commit 71e55e1
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 39 deletions.
8 changes: 3 additions & 5 deletions kart/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,11 +403,9 @@ def _apply_meta_delta_to_tree(

# Actual implementation once we've figured out there's no conflict:
if new_value is not None:
if name.endswith(".json"):
new_value = json_pack(new_value)
else:
new_value = ensure_bytes(new_value)
object_builder.insert(name, new_value)
definition = self.get_meta_item_definition(name)
file_type = MetaItemFileType.get_from_definition_or_suffix(definition, name)
object_builder.insert(name, file_type.encode_to_bytes(delta.new_value))
else:
object_builder.remove(name)

Expand Down
2 changes: 2 additions & 0 deletions kart/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ def meta_set(ctx, message, dataset, items):
message = f"Update metadata for {dataset}"

def _parse(key, value):
if value == "":
return None
value = value_optionally_from_text_file(value, key, ctx, encoding="utf-8")
if key.endswith(".json"):
try:
Expand Down
28 changes: 28 additions & 0 deletions kart/meta_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,31 @@
from kart.serialise_util import ensure_text, ensure_bytes, json_pack, json_unpack


class TagsJsonFileType:
# schema.json should be checked on read and write, by dropping any optional fields that are None.
def decode_from_bytes(self, data):
if data is None:
return None
return self.assert_list_of_strings(json_unpack(data))

def encode_to_bytes(self, meta_item):
if meta_item is None:
return None
return json_pack(self.assert_list_of_strings(meta_item))

def assert_list_of_strings(self, meta_item):
try:
assert isinstance(meta_item, list)
for tag in meta_item:
assert isinstance(tag, str)
except AssertionError as e:
raise AssertionError("tags.json should be a list of strings")
return meta_item


TagsJsonFileType.INSTANCE = TagsJsonFileType()


class SchemaJsonFileType:
# schema.json should be normalised on read and write, by dropping any optional fields that are None.
def decode_from_bytes(self, data):
Expand Down Expand Up @@ -156,6 +181,9 @@ def match_group(self, meta_item_path, match_group):
# A longer description about the dataset's contents:
DESCRIPTION = MetaItemDefinition("description", MetaItemFileType.TEXT)

# A list of tags - each tag is free form text.
TAGS_JSON = MetaItemDefinition("tags.json", TagsJsonFileType.INSTANCE)

# JSON representation of the dataset's schema. See kart/tabular/schema.py, datasets_v3.rst
SCHEMA_JSON = MetaItemDefinition("schema.json", SchemaJsonFileType.INSTANCE)

Expand Down
2 changes: 2 additions & 0 deletions kart/point_cloud/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class PointCloudV1(BaseDataset):

TITLE = meta_items.TITLE
DESCRIPTION = meta_items.DESCRIPTION
TAGS_JSON = meta_items.TAGS_JSON
METADATA_XML = meta_items.METADATA_XML

# Which tile format(s) this dataset requires / allows.
Expand All @@ -63,6 +64,7 @@ class PointCloudV1(BaseDataset):
META_ITEMS = (
TITLE,
DESCRIPTION,
TAGS_JSON,
METADATA_XML,
FORMAT_JSON,
SCHEMA_JSON,
Expand Down
2 changes: 2 additions & 0 deletions kart/tabular/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class TableV3(RichTableDataset):
# === Visible meta-items ===
TITLE = meta_items.TITLE
DESCRIPTION = meta_items.DESCRIPTION
TAGS_JSON = meta_items.TAGS_JSON
METADATA_XML = meta_items.METADATA_XML
SCHEMA_JSON = meta_items.SCHEMA_JSON
CRS_DEFINITIONS = meta_items.CRS_DEFINITIONS
Expand All @@ -92,6 +93,7 @@ class TableV3(RichTableDataset):
META_ITEMS = (
TITLE,
DESCRIPTION,
TAGS_JSON,
METADATA_XML,
SCHEMA_JSON,
CRS_DEFINITIONS,
Expand Down
24 changes: 24 additions & 0 deletions kart/tabular/working_copy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
NotYetImplemented,
)
from kart.key_filters import DatasetKeyFilter, FeatureKeyFilter, RepoKeyFilter
from kart import meta_items
from kart.promisor_utils import LibgitSubcode
from kart.sqlalchemy.upsert import Upsert as upsert
from kart.tabular.table_dataset import TableDataset
Expand Down Expand Up @@ -42,6 +43,14 @@ class TableWorkingCopy(WorkingCopyPart):
self.kart_tables - sqlalchemy Table definitions for kart_state and kart_track tables.
"""

# Subclasses should override if they can support more meta-items eg description or metadata.xml
SUPPORTED_META_ITEMS = (
meta_items.TITLE,
meta_items.SCHEMA_JSON,
meta_items.CRS_DEFINITIONS
# Not description, not metadata.xml, except where overridden by a subclass.
)

@property
def WORKING_COPY_TYPE_NAME(self):
"""Human readable name of this type of working copy, eg "PostGIS"."""
Expand Down Expand Up @@ -505,6 +514,16 @@ def _remove_hidden_numeric_diffs(
if old_col_dict.get(name) is None and new_col_dict.get(name) == default_val:
new_col_dict[name] = None

def _is_supported_meta_item(self, meta_item_name):
if meta_item_name in self.SUPPORTED_META_ITEMS:
return True
for definition in self.SUPPORTED_META_ITEMS:
if not isinstance(definition, meta_items.MetaItemDefinition):
continue
if definition.matches(meta_item_name):
return True
return False

def _remove_hidden_meta_diffs(self, dataset, ds_meta_items, wc_meta_items):
"""
Remove any meta diffs that can't or shouldn't be committed, and so shouldn't be shown to the user.
Expand All @@ -516,6 +535,11 @@ def _remove_hidden_meta_diffs(self, dataset, ds_meta_items, wc_meta_items):
def _safe_del(dict_, key):
dict_.pop(key, None)

ds_meta_keys = list(ds_meta_items.keys())
for key in ds_meta_keys:
if not self._is_supported_meta_item(key):
_safe_del(ds_meta_items, key)

# A dataset should have at most ONE of "metadata.xml" or "metadata/dataset.json".
# The XML file is newer and supercedes the JSON file.
# The GPKG adapter generates both, so we delete one so as to match the dataset.
Expand Down
12 changes: 11 additions & 1 deletion kart/tabular/working_copy/gpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import sqlalchemy as sa
from kart import crs_util
from kart.exceptions import InvalidOperation
from kart import meta_items
from kart.sqlalchemy import text_with_inlined_params
from kart.sqlalchemy.adapter.gpkg import KartAdapter_GPKG
from kart.schema import Schema
Expand All @@ -34,6 +35,15 @@ class WorkingCopy_GPKG(TableWorkingCopy):

WORKING_COPY_TYPE_NAME = "GPKG"

SUPPORTED_META_ITEMS = (
meta_items.TITLE,
meta_items.DESCRIPTION,
meta_items.SCHEMA_JSON,
meta_items.CRS_DEFINITIONS,
meta_items.METADATA_XML,
"metadata/dataset.json",
)

def __init__(self, repo, location):
self.repo = repo
self.path = self.location = location
Expand Down Expand Up @@ -93,7 +103,7 @@ def normalise_location(cls, wc_location, repo):

@property
def full_path(self):
""" Return a full absolute path to the working copy """
"""Return a full absolute path to the working copy"""
return (self.repo.workdir_path / self.path).resolve()

@property
Expand Down
11 changes: 0 additions & 11 deletions kart/tabular/working_copy/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,20 +228,9 @@ def try_align_schema_col(cls, old_col_dict, new_col_dict):

return new_type == old_type

_UNSUPPORTED_META_ITEMS = (
"description",
"metadata/dataset.json",
"metadata.xml",
)

def _remove_hidden_meta_diffs(self, dataset, ds_meta_items, wc_meta_items):
super()._remove_hidden_meta_diffs(dataset, ds_meta_items, wc_meta_items)

# Nowhere to put these in SQL Server WC
for key in self._UNSUPPORTED_META_ITEMS:
if key in ds_meta_items:
del ds_meta_items[key]

for key in ds_meta_items.keys() & wc_meta_items.keys():
if not key.startswith("crs/"):
continue
Expand Down
11 changes: 0 additions & 11 deletions kart/tabular/working_copy/postgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,6 @@ def _suspend_triggers(self, sess, dataset):
"""
)

# Postgis has nowhere obvious to put this metadata.
_UNSUPPORTED_META_ITEMS = ("description", "metadata/dataset.json", "metadata.xml")

# PostGIS approximates an int8 as an int16 - see super()._remove_hidden_meta_diffs
@classmethod
def try_align_schema_col(cls, old_col_dict, new_col_dict):
Expand All @@ -275,14 +272,6 @@ def try_align_schema_col(cls, old_col_dict, new_col_dict):

return new_type == old_type

def _remove_hidden_meta_diffs(self, dataset, ds_meta_items, wc_meta_items):
super()._remove_hidden_meta_diffs(dataset, ds_meta_items, wc_meta_items)

# Nowhere to put these in postgis WC
for key in self._UNSUPPORTED_META_ITEMS:
if key in ds_meta_items:
del ds_meta_items[key]

def _is_builtin_crs(self, crs):
auth_name, auth_code = crs_util.parse_authority(crs)
return auth_name in ("EPSG", "ESRI") or auth_code == "900913" # GOOGLE
Expand Down
11 changes: 0 additions & 11 deletions kart/tabular/working_copy/sqlserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,20 +263,9 @@ def try_align_schema_col(cls, old_col_dict, new_col_dict):

return new_type == old_type

_UNSUPPORTED_META_ITEMS = (
"description",
"metadata/dataset.json",
"metadata.xml",
)

def _remove_hidden_meta_diffs(self, dataset, ds_meta_items, wc_meta_items):
super()._remove_hidden_meta_diffs(dataset, ds_meta_items, wc_meta_items)

# Nowhere to put these in SQL Server WC
for key in self._UNSUPPORTED_META_ITEMS:
if key in ds_meta_items:
del ds_meta_items[key]

# Nowhere to put custom CRS in SQL Server, so remove custom CRS diffs.
# The working copy doesn't know the true authority name, so refers to them all as CUSTOM.
# Their original authority name could be anything.
Expand Down

0 comments on commit 71e55e1

Please sign in to comment.