Skip to content
This repository has been archived by the owner on Aug 18, 2022. It is now read-only.

Commit

Permalink
Merge branch 'feature/chunked-read-write'
Browse files Browse the repository at this point in the history
  • Loading branch information
tmontaigu committed Aug 29, 2020
2 parents 33f6d40 + ac92a5f commit cf1b84c
Show file tree
Hide file tree
Showing 35 changed files with 1,686 additions and 686 deletions.
83 changes: 83 additions & 0 deletions buffered.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import numpy as np

import pylas
from pathlib import Path
import argparse
import io


def main():
parser = argparse.ArgumentParser()
parser.add_argument("path")
parser.add_argument("--ilaz", default=None)
parser.add_argument("--olaz", default=None)

args = parser.parse_args()

if args.olaz is not None:
olaz_backend = (getattr(pylas.LazBackend, args.olaz),)
do_compress = True
else:
olaz_backend = None
do_compress = False

if args.ilaz is not None:
ilaz_backend = (getattr(pylas.LazBackend, args.ilaz),)
all_files = Path(args.path).rglob("*.la[sz]")
else:
ilaz_backend = None
all_files = Path(args.path).rglob("*.las")

for file_path in all_files:
print(f"checking {file_path}")
# with io.BytesIO() as output:
# # with open('lol.laz', mode="w+b") as output:
# with pylas.open(str(file_path), laz_backends=ilaz_backend) as las_file:
# with pylas.open(output,
# mode='w',
# header=las_file.header,
# do_compress=do_compress,
# closefd=False,
# laz_backends=olaz_backend) as las_out:
# las_out.vlrs = las_file.vlrs
# for points in las_file.chunk_iterator(1_216_418):
# las_out.write(points)
# # break
#
# output.seek(0, io.SEEK_END)
# print(f"output is {output.tell()}")
# output.seek(0, io.SEEK_SET)
#
# with open("dump.laz", mode="wb") as dumpf:
# dumpf.write(output.getbuffer())
#
# original_las = pylas.read(str(file_path), laz_blackends=ilaz_backend)
# written_las = pylas.read(output, laz_blackends=ilaz_backend)
#
# assert original_las.points.dtype == written_las.points.dtype
# for dim_name in original_las.points.dtype.names:
# assert np.allclose(original_las.points[dim_name],
# written_las.points[dim_name]), f"{dim_name} dimensions are not equal"


original_las = pylas.read(str(file_path), laz_blackends=ilaz_backend)
with io.BytesIO() as output:
# with open('lol.laz', mode="w+b") as output:
original_las.write(output, do_compress=True, laz_backend=olaz_backend)

print(output.tell())
print(output.seek(0, io.SEEK_SET))

original_las = pylas.read(str(file_path), laz_blackends=ilaz_backend)
written_las = pylas.read(output, laz_blackends=ilaz_backend)

assert original_las.points.dtype == written_las.points.dtype
for dim_name in original_las.points.dtype.names:
assert np.allclose(original_las.points[dim_name],
written_las.points[dim_name]), f"{dim_name} dimensions are not equal"

break


if __name__ == '__main__':
main()
46 changes: 40 additions & 6 deletions docs/basic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,47 @@ Basic Manipulation

Opening & Reading
=================
You have two ways to read LAS files with pylas.

The easiest one is using :func:`pylas.read` function.
Reading is done using :func:`pylas.read` function.
This function will read everything in the file (Header, vlrs, point records, ...) and return an object
that you can use to access to the data.

.. code:: python
import pylas
las = pylas.read('somefile.las')
print(np.unique(las.classification))
pylas can also :func:`pylas.open` files reading just the header and vlrs but not the points, this is useful
if you need metada informations that are contained in the header.

.. code:: python
import s3fs
import pylas
fs = s3fs.S3FileSystem()
with fs.open('my-bucket/some_file.las', 'rb') as f:
las = pylas.read(f)
if f.header.point_count < 100_000_000:
las = pylas.read(f)
Sometimes files are big, too big to be read entirely and fit into your RAM.
The object returned by the :func:`pylas.open` function, :class:`pylas.lasreader.LasReader`
can also be used to read points chunk by chunk, which will allow you to do some
processing on large files (splitting, filtering, etc)

.. code:: python
import pylas
with pylas.open("some_big_file.laz") as f:
for points in f.chunk_iterator(1_000_000):
do_something_with(points)
The other way to read a las file is to use the :func:`pylas.open`.
As the name suggest, this function does not read the whole file, but opens it and only read the header.
This is useful if you only need to read the header without loading the whole file in memory.
Converting
Expand All @@ -50,6 +70,20 @@ To be able to write a las file you will need a :class:`pylas.lasdatas.base.LasBa
You obtain this type of object by using one of the function above,
use its method :meth:`pylas.lasdatas.base.LasBase.write` to write to a file or a stream.


Similar to :class:`pylas.lasreader.LasReader` there exists a way to write a file
chunk by chunk.


.. code:: python
import pylas
with pylas.open("some_big_file.laz") as f:
with pylas.open("grounds.laz", mode="w", header=f.header) as writer:
for points in f.chunk_iterator(1_234_567):
writer.write_points(points[points.classification == 2]
.. _accessing_header:
Accessing the file header
Expand Down
10 changes: 7 additions & 3 deletions pylas/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
__version__ = "0.4.2"

import logging

from . import errors, vlrs
from .evlrs import EVLR
from .headers import HeaderFactory
from .laswriter import LasWriter
from .lib import LazBackend
from .lib import convert, create_from_header
from .lib import create_las as create
from .lib import merge_las as merge
from .lib import mmap_las as mmap
from .lib import open_las as open
from .lib import read_las as read
from .lib import merge_las as merge
from .point import PointFormat
from .point.dims import supported_point_formats, supported_versions
from .point.format import lost_dimensions

import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())
logging.getLogger(__name__).addHandler(logging.NullHandler())
144 changes: 62 additions & 82 deletions pylas/compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
There are also functions to use Laszip (meant to be used as a fallback)
"""
import enum
import os
import subprocess
from enum import Enum, auto
from typing import Tuple

import numpy as np

Expand All @@ -16,12 +16,10 @@

try:
import lazperf

HAS_LAZPERF = True
# we should capture ModuleNotFoundError but it's a python3.6 exception type
# and ReadTheDocs uses 3.5
except:
except ModuleNotFoundError:
HAS_LAZPERF = False
else:
HAS_LAZPERF = True


def raise_if_no_lazperf():
Expand All @@ -33,6 +31,51 @@ def raise_if_no_lazperf():
)


class LazBackend(enum.Enum):
"""Supported backends for reading and writing LAS/LAZ"""

# type_hint = Union[LazBackend, Iterable[LazBackend]]

LazrsParallel = 0
Lazrs = 1
Laszip = 2 # laszip executable, used through a Popen

def is_available(self) -> bool:
"""Returns true if the backend is available"""
if self == LazBackend.Lazrs or self == LazBackend.LazrsParallel:
try:
import lazrs
except ModuleNotFoundError:
return False
else:
return True
elif self == LazBackend.Laszip:
try:
find_laszip_executable()
except FileNotFoundError:
return False
else:
return True
else:
return False

@staticmethod
def detect_available() -> Tuple["LazBackend"]:
"""Returns a tuple containing the available backends in the current
python environment
"""
available_backends = []

if LazBackend.LazrsParallel.is_available():
available_backends.append(LazBackend.LazrsParallel)
available_backends.append(LazBackend.Lazrs)

if LazBackend.Laszip.is_available():
available_backends.append(LazBackend.Laszip)

return tuple(available_backends)


def is_point_format_compressed(point_format_id):
compression_bit_7 = (point_format_id & 0x80) >> 7
compression_bit_6 = (point_format_id & 0x40) >> 6
Expand All @@ -42,14 +85,16 @@ def is_point_format_compressed(point_format_id):


def compressed_id_to_uncompressed(point_format_id):
return point_format_id & 0x3f
return point_format_id & 0x3F


def uncompressed_id_to_compressed(point_format_id):
return (2 ** 7) | point_format_id


def lazrs_decompress_buffer(compressed_buffer, point_size, point_count, laszip_vlr, parallel=True):
def lazrs_decompress_buffer(
compressed_buffer, point_size, point_count, laszip_vlr, parallel=True
):
try:
import lazrs
except Exception as e:
Expand All @@ -61,7 +106,9 @@ def lazrs_decompress_buffer(compressed_buffer, point_size, point_count, laszip_v

point_decompressed = np.zeros(point_count * point_size, np.uint8)

lazrs.decompress_points(point_compressed, vlr_data, point_decompressed, parallel)
lazrs.decompress_points(
point_compressed, vlr_data, point_decompressed, parallel
)
except lazrs.LazrsError as e:
raise LazError("lazrs error: {}".format(e)) from e
else:
Expand All @@ -76,12 +123,11 @@ def lazrs_compress_points(points_data, parallel=True):

try:
vlr = lazrs.LazVlr.new_for_compression(
points_data.point_format.id, points_data.point_format.num_extra_bytes)
points_data.point_format.id, points_data.point_format.num_extra_bytes
)

compressed_data = lazrs.compress_points(
vlr,
np.frombuffer(points_data.array, np.uint8),
parallel
vlr, np.frombuffer(points_data.array, np.uint8), parallel
)
except lazrs.LazrsError as e:
raise LazError("lazrs error: {}".format(e)) from e
Expand All @@ -96,16 +142,15 @@ def lazperf_decompress_buffer(compressed_buffer, point_size, point_count, laszip
point_compressed = np.frombuffer(compressed_buffer, dtype=np.uint8)

vlr_data = np.frombuffer(laszip_vlr.record_data, dtype=np.uint8)
decompressor = lazperf.VLRDecompressor(
point_compressed, point_size, vlr_data
)
decompressor = lazperf.VLRDecompressor(point_compressed, point_size, vlr_data)

point_uncompressed = decompressor.decompress_points(point_count)

return point_uncompressed
except RuntimeError as e:
raise LazError("lazperf error: {}".format(e))


def lazperf_create_laz_vlr(points_record):
raise_if_no_lazperf()
try:
Expand Down Expand Up @@ -161,68 +206,3 @@ def find_laszip_executable():

else:
raise FileNotFoundError("Could not find laszip executable")


class LasZipProcess:
class Actions(Enum):
Compress = auto()
Decompress = auto()

def __init__(self, action, stdin=subprocess.PIPE, stdout=subprocess.PIPE):
""" Creates a Popen to the laszip executable.
This tries to be a wrapper for
https://docs.python.org/fr/3/library/subprocess.html#subprocess.Popen
Valid inputs for `stdin` and `stdout` are file objects supporting
the fileno() method. For example files opened with `open`.
The usage is kinda tricky:
"""
laszip_binary = find_laszip_executable()

if action == LasZipProcess.Actions.Decompress:
out_t = "-olas"
elif action == LasZipProcess.Actions.Compress:
out_t = "-olaz"
else:
raise ValueError("Invalid Action")

self.prc = subprocess.Popen(
[laszip_binary, "-stdin", out_t, "-stdout"],
stdin=stdin,
stdout=stdout,
stderr=subprocess.PIPE,
)

@property
def stdin(self):
return self.prc.stdin

@property
def stdout(self):
return self.prc.stdout

def wait(self):
return self.prc.wait()

def communicate(self):
stdout_data, stderr_data = self.prc.communicate()
self.raise_if_bad_err_code(stderr_data.decode())
return stdout_data

def raise_if_bad_err_code(self, error_msg=None):
if error_msg is None:
error_msg = self.prc.stderr.read().decode()
if self.prc.returncode != 0:
raise RuntimeError(
"Laszip failed to {} with error code {}\n\t{}".format(
"compress", self.prc.returncode, "\n\t".join(error_msg.splitlines())
)
)

def wait_until_finished(self):
self.stdin.close()
self.prc.wait()
self.raise_if_bad_err_code(self.prc.stderr.read().decode())

0 comments on commit cf1b84c

Please sign in to comment.