Skip to content
This repository has been archived by the owner on Aug 18, 2022. It is now read-only.

Commit

Permalink
Merge branch 'feature/append-mode'
Browse files Browse the repository at this point in the history
  • Loading branch information
tmontaigu committed Sep 3, 2020
2 parents cf1b84c + 7905534 commit 4944aab
Show file tree
Hide file tree
Showing 12 changed files with 345 additions and 69 deletions.
1 change: 1 addition & 0 deletions pylas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .evlrs import EVLR
from .headers import HeaderFactory
from .laswriter import LasWriter
from .errors import PylasError
from .lib import LazBackend
from .lib import convert, create_from_header
from .lib import create_las as create
Expand Down
33 changes: 33 additions & 0 deletions pylas/headers/rawheader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from .. import compression, utils
from .. import errors
from ..point.record import PointRecord

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -255,6 +256,38 @@ def set_compressed(self, compressed: bool):
self._point_data_format_id
)

def update(self, points: PointRecord) -> None:
self.x_max = max(
self.x_max,
(points["X"].max() * self.x_scale) + self.x_offset,
)
self.y_max = max(
self.y_max,
(points["Y"].max() * self.y_scale) + self.y_offset,
)
self.z_max = max(
self.z_max,
(points["Z"].max() * self.z_scale) + self.z_offset,
)
self.x_min = min(
self.x_min,
(points["X"].min() * self.x_scale) + self.x_offset,
)
self.y_min = min(
self.y_min,
(points["Y"].min() * self.y_scale) + self.y_offset,
)
self.z_min = min(
self.z_min,
(points["Z"].min() * self.z_scale) + self.z_offset,
)

for i, count in zip(*np.unique(points.return_number, return_counts=True)):
if i >= len(self.number_of_points_by_return):
break # np.unique sorts unique values
self.number_of_points_by_return[i - 1] += count
self.point_count += len(points)

def __repr__(self):
return "<LasHeader({})>".format(self.version)

Expand Down
189 changes: 189 additions & 0 deletions pylas/lasappender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import io
import math
from typing import Union, Iterable

from .compression import LazBackend
from .errors import PylasError
from .evlrs import EVLRList, RawEVLRList
from .lasreader import LasReader, get_extra_dims_info_tuple
from .laswriter import UncompressedPointWriter
from .point.format import PointFormat
from .point.record import PointRecord

try:
import lazrs
except ModuleNotFoundError:
pass


class LazrsAppender:
def __init__(self, dest, header, vlrs, parallel):
self.dest = dest
self.offset_to_point_data = header.offset_to_point_data
laszip_vlr = vlrs.pop(vlrs.index("LasZipVlr"))

self.dest.seek(header.offset_to_point_data, io.SEEK_SET)
decompressor = lazrs.LasZipDecompressor(self.dest, laszip_vlr.record_data)
vlr = decompressor.vlr()
number_of_complete_chunk = int(
math.floor(header.point_count / vlr.chunk_size())
)

self.dest.seek(header.offset_to_point_data, io.SEEK_SET)
chunk_table = lazrs.read_chunk_table(self.dest)
if chunk_table is None:
# The file does not have a chunk table
# we cannot seek to the last chunk, so instead, we will
# decompress points (which is slower)

self.chunk_table = []
start_of_chunk = self.dest.tell()
point_buf = bytearray(vlr.chunk_size() * vlr.item_size())

for _ in range(number_of_complete_chunk):
decompressor.decompress_many(point_buf)
pos = self.dest.tell()
self.chunk_table.append(pos - start_of_chunk)
start_of_chunk = pos
else:
self.chunk_table = chunk_table[:-1]
idx_first_point_of_last_chunk = number_of_complete_chunk * vlr.chunk_size()
decompressor.seek(idx_first_point_of_last_chunk)

points_of_last_chunk = bytearray(
(header.point_count % vlr.chunk_size()) * vlr.item_size()
)
decompressor.decompress_many(points_of_last_chunk)

self.dest.seek(header.offset_to_point_data, io.SEEK_SET)
if parallel:
self.compressor = lazrs.ParLasZipCompressor(
self.dest, vlr
) # This overwrites old offset
else:
self.compressor = lazrs.LasZipCompressor(
self.dest, vlr
) # This overwrites the old offset
self.dest.seek(sum(self.chunk_table), io.SEEK_CUR)
self.compressor.compress_many(points_of_last_chunk)

def write_points(self, points):
self.compressor.compress_many(points.memoryview())

def done(self):
# The chunk table written is at the good position
# but it is incomplete (it's missing the chunk_table of
# chunks before the one we appended)
self.compressor.done()

# So we update it
self.dest.seek(self.offset_to_point_data, io.SEEK_SET)
offset_to_chunk_table = int.from_bytes(self.dest.read(8), "little", signed=True)
self.dest.seek(-8, io.SEEK_CUR)
chunk_table = self.chunk_table + lazrs.read_chunk_table(self.dest)
self.dest.seek(offset_to_chunk_table, io.SEEK_SET)
lazrs.write_chunk_table(self.dest, chunk_table)


class LasAppender:
def __init__(
self,
dest,
laz_backend: Union[LazBackend, Iterable[LazBackend]] = (
LazBackend.LazrsParallel,
LazBackend.Lazrs,
),
closefd: bool = True,
) -> None:
if not dest.seekable():
raise TypeError("Expected the 'dest' to be a seekable file object")
header, vlrs = LasReader._read_header_and_vlrs(dest, seekable=True)

self.dest = dest
self.header = header
self.vlrs = vlrs
self.point_format = PointFormat(
self.header.point_format_id,
get_extra_dims_info_tuple(self.header, self.vlrs),
)

if not header.are_points_compressed:
self.points_writer = UncompressedPointWriter(self.dest)
self.dest.seek(
(self.header.point_count * self.header.point_size) + self.header.offset_to_point_data,
io.SEEK_SET
)
else:
self.points_writer = self._create_laz_backend(laz_backend)

if header.version >= "1.4" and header.number_of_evlr > 0:
assert self.dest.tell() <= self.header.start_of_first_evlr, "The position is past the start of evlrs"
pos = self.dest.tell()
self.dest.seek(self.header.start_of_first_evlr, io.SEEK_SET)
self.evlrs = EVLRList.read_from(self.dest, self.header.number_of_evlr)
dest.seek(self.header.start_of_first_evlr, io.SEEK_SET)
self.dest.seek(pos, io.SEEK_SET)
elif header.version >= "1.4":
self.evlrs = []

self.closefd = closefd

def append_points(self, points: PointRecord) -> None:
if points.point_format != self.point_format:
raise PylasError("Point formats do not match")

self.points_writer.write_points(points)
self.header.update(points)

def close(self) -> None:
self.points_writer.done()
self._write_evlrs()
self._write_updated_header()

if self.closefd:
self.dest.close()

def _write_evlrs(self):
if self.header.version >= "1.4" and len(self.evlrs) > 0:
self.header.number_of_evlr = len(self.evlrs)
self.header.start_of_first_evlr = self.dest.tell()
raw_evlrs = RawEVLRList.from_list(self.evlrs)
raw_evlrs.write_to(self.dest)

def _write_updated_header(self):
pos = self.dest.tell()
self.dest.seek(0, io.SEEK_SET)
self.header.write_to(self.dest)
self.dest.seek(pos, io.SEEK_SET)

def _create_laz_backend(
self,
laz_backend: Union[LazBackend, Iterable[LazBackend]] = (
LazBackend.LazrsParallel,
LazBackend.Lazrs,
),
) -> LazrsAppender:
try:
laz_backend = iter(laz_backend)
except TypeError:
laz_backend = (laz_backend,)

last_error = None
for backend in laz_backend:
if backend == LazBackend.Laszip:
raise PylasError("Laszip backend does not support appending")
elif backend == LazBackend.LazrsParallel:
return LazrsAppender(self.dest, self.header, self.vlrs, parallel=True)
elif backend == LazBackend.Lazrs:
return LazrsAppender(self.dest, self.header, self.vlrs, parallel=False)
else:
if last_error is not None:
raise PylasError(f"Could not initialize a laz backend: {last_error}")
else:
raise PylasError(f"No valid laz backend selected")

def __enter__(self):
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
65 changes: 34 additions & 31 deletions pylas/lasreader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import abc
import io
import logging
import os
import subprocess
Expand All @@ -13,7 +14,7 @@
from .point import record, PointFormat
from .point.dims import size_of_point_format_id
from .utils import ConveyorThread
from .vlrs.known import LasZipVlr, ExtraBytesVlr
from .vlrs.known import LasZipVlr
from .vlrs.vlrlist import VLRList

try:
Expand All @@ -24,16 +25,33 @@
logger = logging.getLogger(__name__)


def get_extra_dims_info_tuple(header, vlrs) -> Optional[Tuple[Tuple[str, str], ...]]:
try:
extra_dims = vlrs.get("ExtraBytesVlr")[0].type_of_extra_dims()
except IndexError:
return None

point_size_without_extra_bytes = size_of_point_format_id(header.point_format_id)
if header.point_size == point_size_without_extra_bytes:
logger.warning(
"There is an ExtraByteVlr but the header.point_size matches the "
"point size without extra bytes. The extra bytes vlr info will be ignored"
)
vlrs.extract("ExtraBytesVlr")
extra_dims = None
return extra_dims


class LasReader:
"""The reader class handles LAS and LAZ via one of the supported backend"""

def __init__(
self,
source: BinaryIO,
closefd: bool = True,
laz_backends: Union[
LazBackend, Iterable[LazBackend]
] = LazBackend.detect_available(),
self,
source: BinaryIO,
closefd: bool = True,
laz_backends: Union[
LazBackend, Iterable[LazBackend]
] = LazBackend.detect_available(),
):
self.closefd = closefd
self.laz_backends = laz_backends
Expand All @@ -54,7 +72,8 @@ def __init__(

self.points_read = 0
self.point_format = PointFormat(
self.header.point_format_id, extra_dims=self._get_extra_dims()
self.header.point_format_id,
extra_dims=get_extra_dims_info_tuple(self.header, self.vlrs),
)

def read_n_points(self, n: int) -> Optional[record.ScaleAwarePointRecord]:
Expand Down Expand Up @@ -83,7 +102,11 @@ def read(self):
points = record.PackedPointRecord.empty(self.point_format)

if self.header.version >= "1.4":
evlrs = self._read_evlrs(self.point_source.source)
if self.header.are_points_compressed and not self.point_source.source.seekable():
# We explicitly require seekable stream because we have to seek
# past the chunk table of LAZ file
raise errors.PylasError("source must be seekable, to read evlrs form LAZ file")
evlrs = self._read_evlrs(self.point_source.source, seekable=True)
las_data = las14.LasData(
header=self.header, vlrs=self.vlrs, points=points, evlrs=evlrs
)
Expand Down Expand Up @@ -137,32 +160,14 @@ def _create_laz_backend(self, source):
except errors.LazError as e:
logger.error(e)

def _get_extra_dims(self) -> Optional[Tuple[Tuple[str, str], ...]]:
try:
extra_dims = self.vlrs.get("ExtraBytesVlr")[0].type_of_extra_dims()
except IndexError:
return None

point_size_without_extra_bytes = size_of_point_format_id(
self.header.point_format_id
)
if self.header.point_size == point_size_without_extra_bytes:
logger.warning(
"There is an ExtraByteVlr but the header.point_size matches the "
"point size without extra bytes. The extra bytes vlr info will be ignored"
)
self.vlrs.extract("ExtraBytesVlr")
extra_dims = None
return extra_dims

@staticmethod
def _read_header_and_vlrs(source, seekable=True):
header = headers.HeaderFactory().read_from_stream(source)
vlrs = VLRList.read_from(source, num_to_read=header.number_of_vlr)
if seekable:
offset = header.offset_to_point_data - source.tell()
if offset >= 0:
source.read(offset)
source.seek(offset, io.SEEK_CUR)
else:
raise RuntimeError("Read past point data") # TODO
return header, vlrs
Expand Down Expand Up @@ -325,9 +330,7 @@ def __init__(self, source, laszip_vlr: LasZipVlr, parallel: bool) -> None:
source, laszip_vlr.record_data
)
else:
self.decompressor = lazrs.LasZipDecompressor(
laszip_vlr.record_data, self.source
)
self.decompressor = lazrs.LasZipDecompressor(source, laszip_vlr.record_data)

def read_n_points(self, n) -> bytes:
point_bytes = np.zeros(n * self.vlr.item_size(), np.uint8)
Expand Down

0 comments on commit 4944aab

Please sign in to comment.