Skip to content

Commit

Permalink
Merge pull request #587 from koordinates/pc-import-metadata
Browse files Browse the repository at this point in the history
Import point-cloud metadata - schema and CRS
  • Loading branch information
olsen232 committed Mar 17, 2022
2 parents 26c4d76 + 7be3fdc commit 84ac2f2
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 24 deletions.
2 changes: 1 addition & 1 deletion kart/crs_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _generate_identifier_int(crs):
# it will get a new auto-generated ID - but the user can name it to avoid this behaviour.
if isinstance(crs, str):
normalised_wkt = normalise_wkt(crs)
elif isinstance(crs, SpatialReference):
elif isinstance(crs, osr.SpatialReference):
normalised_wkt = normalise_wkt(crs.ExportToWkt())
else:
raise RuntimeError(f"Unrecognised CRS: {crs}")
Expand Down
99 changes: 76 additions & 23 deletions kart/point_cloud/import_.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import click
from osgeo import osr

from kart.crs_util import make_crs
from kart.crs_util import make_crs, get_identifier_str, normalise_wkt
from kart.dataset_util import validate_dataset_paths
from kart.exceptions import (
InvalidOperation,
Expand All @@ -24,7 +24,7 @@
write_blob_to_stream,
write_blobs_to_stream,
)
from kart.serialise_util import hexhash
from kart.serialise_util import hexhash, json_pack, ensure_bytes
from kart.output_util import format_wkt_for_output
from kart.repo_version import (
SUPPORTED_REPO_VERSIONS,
Expand All @@ -34,11 +34,17 @@

@click.command("point-cloud-import", hidden=True)
@click.pass_context
@click.option(
"--convert-to-copc/--no-convert-to-copc",
is_flag=True,
default=True,
help="Convert non-COPC LAS or LAZ files to COPC LAZ files",
)
@click.option(
"--dataset-path", "ds_path", help="The dataset's path once imported", required=True
)
@click.argument("sources", metavar="SOURCES", nargs=-1, required=True)
def point_cloud_import(ctx, ds_path, sources):
def point_cloud_import(ctx, convert_to_copc, ds_path, sources):
"""
Experimental command for importing point cloud datasets. Work-in-progress.
Will eventually be merged with the main `import` command.
Expand All @@ -61,8 +67,12 @@ def point_cloud_import(ctx, ds_path, sources):
version_set = ListBasedSet()
copc_version_set = ListBasedSet()
pdrf_set = ListBasedSet()
pdr_length_set = ListBasedSet()
schema_set = ListBasedSet()
crs_set = ListBasedSet()
transform = None
schema = None
crs_name = None

per_source_info = {}

Expand All @@ -75,6 +85,9 @@ def point_cloud_import(ctx, ds_path, sources):
"count": 0, # Don't read any individual points.
}
]
if schema is None:
config.append({"type": "filters.info"})

pipeline = pdal.Pipeline(json.dumps(config))
try:
pipeline.execute()
Expand All @@ -83,7 +96,8 @@ def point_cloud_import(ctx, ds_path, sources):
f"Error reading {source}", exit_code=INVALID_FILE_FORMAT
)

info = json.loads(pipeline.metadata)["metadata"]["readers.las"]
metadata = json.loads(pipeline.metadata)["metadata"]
info = metadata["readers.las"]

compressed_set.add(info["compressed"])
if len(compressed_set) > 1:
Expand All @@ -102,6 +116,10 @@ def point_cloud_import(ctx, ds_path, sources):
if len(pdrf_set) > 1:
raise _non_homogenous_error("Point Data Record Format", pdrf_set)

pdr_length_set.add(info["point_length"])
if len(pdr_length_set) > 1:
raise _non_homogenous_error("Point Data Record Length", pdr_length_set)

crs_set.add(info["srs"]["wkt"])
if len(crs_set) > 1:
raise _non_homogenous_error(
Expand All @@ -112,7 +130,7 @@ def point_cloud_import(ctx, ds_path, sources):
)

if transform is None:
src_crs = make_crs(crs_set[0])
src_crs = make_crs(crs_set.only())
target_crs = make_crs("EPSG:4326")
transform = osr.CoordinateTransformation(src_crs, target_crs)

Expand All @@ -124,8 +142,39 @@ def point_cloud_import(ctx, ds_path, sources):
"crs84_envelope": crs84_envelope,
}

if schema is None:
crs_name = get_identifier_str(crs_set.only())
schema = metadata["filters.info"]["schema"]
schema["CRS"] = crs_name

click.echo()

version = version_set.only()
copc_version = copc_version_set.only()
is_laz = compressed_set.only() is True
is_copc = is_laz and copc_version != NOT_COPC

if is_copc:
# Keep native format.
import_func = _copy_tile_to_lfs_blob
kart_format = f"pc:v1/copc-{copc_version}.0"
elif is_laz:
# Optionally Convert to COPC 1.0 if requested
import_func = (
_convert_tile_to_copc_lfs_blob
if convert_to_copc
else _copy_tile_to_lfs_blob
)
kart_format = "pc:v1/copc-1.0" if convert_to_copc else f"pc:v1/laz-{version}"
else: # LAS
if not convert_to_copc:
raise InvalidOperation(
"LAS datasets are not supported - dataset must be converted to LAZ / COPC",
exit_code=INVALID_FILE_FORMAT,
)
import_func = _convert_tile_to_copc_lfs_blob
kart_format = "pc:v1/copc-1.0"

# Set up LFS hooks.
# TODO: This could eventually be moved to `kart init`.
if not (repo.gitdir_path / "hooks" / "pre-push").is_file():
Expand All @@ -140,20 +189,12 @@ def point_cloud_import(ctx, ds_path, sources):
header = generate_header(
repo,
None,
f"Importing {len(sources)} point-cloud tiles as {ds_path}",
f"Importing {len(sources)} LAZ tiles as {ds_path}",
repo.head_branch,
repo.head_commit,
)

# TODO - revisit this if another COPC version is released.
if compressed_set[0] is True and copc_version_set[0] != NOT_COPC:
# Keep native format.
import_func = _copy_tile_to_copc_lfs_blob
copc_version = copc_version_set[0]
else:
# Convert to COPC 1.0
import_func = _convert_tile_to_copc_lfs_blob
copc_version = 1
ds_inner_path = f"{ds_path}/.point-cloud-dataset.v1"

lfs_objects_path = repo.gitdir_path / "lfs" / "objects"
lfs_tmp_import_path = lfs_objects_path / "import"
Expand All @@ -177,22 +218,29 @@ def point_cloud_import(ctx, ds_path, sources):
# TODO - is this the right prefix and name?
tilename = os.path.splitext(os.path.basename(source))[0] + ".copc.laz"
tile_prefix = hexhash(tilename)[0:2]
blob_path = (
f"{ds_path}/.point-cloud-dataset.v1/tiles/{tile_prefix}/{tilename}"
)
blob_path = f"{ds_inner_path}/tiles/{tile_prefix}/{tilename}"
info = per_source_info[source]
pointer_dict = {
"version": "https://git-lfs.github.com/spec/v1",
# TODO - available.<URL-IDX> <URL>
"kart.extent.crs84": _format_array(info["crs84_envelope"]),
"kart.extent.native": _format_array(info["native_envelope"]),
"kart.format": f"pc:v1/copc-{copc_version}.0",
"kart.format": kart_format,
"kart.pc.count": info["count"],
"oid": f"sha256:{oid}",
"size": size,
}
write_pointer_file_to_stream(proc.stdin, blob_path, pointer_dict)

write_blob_to_stream(
proc.stdin, f"{ds_inner_path}/meta/schema.json", json_pack(schema)
)
write_blob_to_stream(
proc.stdin,
f"{ds_inner_path}/meta/crs/{crs_name}.wkt",
ensure_bytes(normalise_wkt(crs_set.only())),
)

click.echo()


Expand Down Expand Up @@ -310,7 +358,7 @@ def _convert_tile_to_copc_lfs_blob(source, dest):
return sha256.hexdigest(), size


def _copy_tile_to_copc_lfs_blob(source, dest):
def _copy_tile_to_lfs_blob(source, dest):
"""Copies a file from source to dest and returns the SHA256 and length of the file."""
sha256 = hashlib.sha256()
size = Path(source).stat().st_size
Expand Down Expand Up @@ -338,6 +386,14 @@ def add(self, element):
if element not in self.list:
self.list.append(element)

def only(self):
"""Return the only element in this collection, or raise a LookupError."""
if len(self.list) != 1:
raise LookupError(
f"Can't return only element: set contains {len(self.list)} elements"
)
return self.list[0]

def __contains__(self, element):
return element in self.list

Expand All @@ -346,6 +402,3 @@ def __len__(self):

def __iter__(self):
return iter(self.list)

def __getitem__(self, key):
return self.list[key]
48 changes: 48 additions & 0 deletions tests/test_point_cloud.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from glob import glob
import json
import re
import subprocess
import pytest
Expand Down Expand Up @@ -52,6 +53,28 @@ def test_import_single_las(
)
assert r.exit_code == 0, r.stderr

schema_json = (
repo.head_tree / "autzen/.point-cloud-dataset.v1/meta/schema.json"
)
assert json.loads(schema_json.data) == {
"dimensions": [
{"name": "X", "size": 8, "type": "floating"},
{"name": "Y", "size": 8, "type": "floating"},
{"name": "Z", "size": 8, "type": "floating"},
{"name": "Intensity", "size": 2, "type": "unsigned"},
{"name": "ReturnNumber", "size": 1, "type": "unsigned"},
{"name": "NumberOfReturns", "size": 1, "type": "unsigned"},
{"name": "ScanDirectionFlag", "size": 1, "type": "unsigned"},
{"name": "EdgeOfFlightLine", "size": 1, "type": "unsigned"},
{"name": "Classification", "size": 1, "type": "unsigned"},
{"name": "ScanAngleRank", "size": 4, "type": "floating"},
{"name": "UserData", "size": 1, "type": "unsigned"},
{"name": "PointSourceId", "size": 2, "type": "unsigned"},
{"name": "GpsTime", "size": 8, "type": "floating"},
],
"CRS": "EPSG:2994",
}

r = cli_runner.invoke(["remote", "add", "origin", DUMMY_REPO])
assert r.exit_code == 0, r.stderr
repo.config[f"lfs.{DUMMY_REPO}/info/lfs.locksverify"] = False
Expand Down Expand Up @@ -85,6 +108,31 @@ def test_import_several_las(
)
assert r.exit_code == 0, r.stderr

schema_json = (
repo.head_tree / "auckland/.point-cloud-dataset.v1/meta/schema.json"
)
assert json.loads(schema_json.data) == {
"dimensions": [
{"name": "X", "size": 8, "type": "floating"},
{"name": "Y", "size": 8, "type": "floating"},
{"name": "Z", "size": 8, "type": "floating"},
{"name": "Intensity", "size": 2, "type": "unsigned"},
{"name": "ReturnNumber", "size": 1, "type": "unsigned"},
{"name": "NumberOfReturns", "size": 1, "type": "unsigned"},
{"name": "ScanDirectionFlag", "size": 1, "type": "unsigned"},
{"name": "EdgeOfFlightLine", "size": 1, "type": "unsigned"},
{"name": "Classification", "size": 1, "type": "unsigned"},
{"name": "ScanAngleRank", "size": 4, "type": "floating"},
{"name": "UserData", "size": 1, "type": "unsigned"},
{"name": "PointSourceId", "size": 2, "type": "unsigned"},
{"name": "GpsTime", "size": 8, "type": "floating"},
{"name": "Red", "size": 2, "type": "unsigned"},
{"name": "Green", "size": 2, "type": "unsigned"},
{"name": "Blue", "size": 2, "type": "unsigned"},
],
"CRS": "EPSG:2193",
}

r = cli_runner.invoke(["remote", "add", "origin", DUMMY_REPO])
assert r.exit_code == 0, r.stderr
repo.config[f"lfs.{DUMMY_REPO}/info/lfs.locksverify"] = False
Expand Down

0 comments on commit 84ac2f2

Please sign in to comment.