Merge branch 'feature/chunked-read-write'

tmontaigu · Aug 29, 2020 · cf1b84c · cf1b84c
2 parents 33f6d40 + ac92a5f
commit cf1b84c
Show file tree

Hide file tree

Showing 35 changed files with 1,686 additions and 686 deletions.
diff --git a/buffered.py b/buffered.py
@@ -0,0 +1,83 @@
+import numpy as np
+
+import pylas
+from pathlib import Path
+import argparse
+import io
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("path")
+    parser.add_argument("--ilaz", default=None)
+    parser.add_argument("--olaz", default=None)
+
+    args = parser.parse_args()
+
+    if args.olaz is not None:
+        olaz_backend = (getattr(pylas.LazBackend, args.olaz),)
+        do_compress = True
+    else:
+        olaz_backend = None
+        do_compress = False
+
+    if args.ilaz is not None:
+        ilaz_backend = (getattr(pylas.LazBackend, args.ilaz),)
+        all_files = Path(args.path).rglob("*.la[sz]")
+    else:
+        ilaz_backend = None
+        all_files = Path(args.path).rglob("*.las")
+
+    for file_path in all_files:
+        print(f"checking {file_path}")
+        # with io.BytesIO() as output:
+        # # with open('lol.laz', mode="w+b") as output:
+        #     with pylas.open(str(file_path), laz_backends=ilaz_backend) as las_file:
+        #         with pylas.open(output,
+        #                         mode='w',
+        #                         header=las_file.header,
+        #                         do_compress=do_compress,
+        #                         closefd=False,
+        #                         laz_backends=olaz_backend) as las_out:
+        #             las_out.vlrs = las_file.vlrs
+        #             for points in las_file.chunk_iterator(1_216_418):
+        #                 las_out.write(points)
+        #                 # break
+        #
+        #     output.seek(0, io.SEEK_END)
+        #     print(f"output is {output.tell()}")
+        #     output.seek(0, io.SEEK_SET)
+        #
+        #     with open("dump.laz", mode="wb") as dumpf:
+        #         dumpf.write(output.getbuffer())
+        #
+        #     original_las = pylas.read(str(file_path), laz_blackends=ilaz_backend)
+        #     written_las = pylas.read(output, laz_blackends=ilaz_backend)
+        #
+        #     assert original_las.points.dtype == written_las.points.dtype
+        #     for dim_name in original_las.points.dtype.names:
+        #         assert np.allclose(original_las.points[dim_name],
+        #                            written_las.points[dim_name]), f"{dim_name} dimensions are not equal"
+
+
+        original_las = pylas.read(str(file_path), laz_blackends=ilaz_backend)
+        with io.BytesIO() as output:
+        # with open('lol.laz', mode="w+b") as output:
+            original_las.write(output, do_compress=True, laz_backend=olaz_backend)
+
+            print(output.tell())
+            print(output.seek(0, io.SEEK_SET))
+
+            original_las = pylas.read(str(file_path), laz_blackends=ilaz_backend)
+            written_las = pylas.read(output, laz_blackends=ilaz_backend)
+
+            assert original_las.points.dtype == written_las.points.dtype
+            for dim_name in original_las.points.dtype.names:
+                assert np.allclose(original_las.points[dim_name],
+                                   written_las.points[dim_name]), f"{dim_name} dimensions are not equal"
+
+        break
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/basic.rst b/docs/basic.rst
@@ -5,27 +5,47 @@ Basic Manipulation
 
 Opening & Reading
 =================
-You have two ways to read LAS files with pylas.
 
-The easiest one is using :func:`pylas.read` function.
+Reading is done using :func:`pylas.read` function.
 This function will read everything in the file (Header, vlrs, point records, ...) and return an object
 that you can use to access to the data.
 
 .. code:: python
 
+    import pylas
+
     las = pylas.read('somefile.las')
     print(np.unique(las.classification))
 
+pylas can also :func:`pylas.open` files reading just the header and vlrs but not the points, this is useful
+if you need metada informations that are contained in the header.
+
+.. code:: python
+
     import s3fs
+    import pylas
+
     fs = s3fs.S3FileSystem()
     with fs.open('my-bucket/some_file.las', 'rb') as f:
-         las = pylas.read(f)
+         if f.header.point_count < 100_000_000:
+             las = pylas.read(f)
+
+
+Sometimes files are big, too big to be read entirely and fit into your RAM.
+The object returned by the :func:`pylas.open` function, :class:`pylas.lasreader.LasReader`
+can also be used to read points chunk by chunk, which will allow you to do some
+processing on large files (splitting, filtering, etc)
+
+.. code:: python
+
+    import pylas
+
+    with pylas.open("some_big_file.laz") as f:
+        for points in f.chunk_iterator(1_000_000):
+            do_something_with(points)
 
 
-The other way to read a las file is to use the :func:`pylas.open`.
-As the name suggest, this function does not read the whole file, but opens it and only read the header.
 
-This is useful if you only need to read the header without loading the whole file in memory.
 
 
 Converting
@@ -50,6 +70,20 @@ To be able to write a las file you will need a :class:`pylas.lasdatas.base.LasBa
 You obtain this type of object by using one of the function above,
 use its method :meth:`pylas.lasdatas.base.LasBase.write` to write to a file or a stream.
 
+
+Similar to :class:`pylas.lasreader.LasReader` there exists a way to write a file
+chunk by chunk.
+
+
+.. code:: python
+
+    import pylas
+
+    with pylas.open("some_big_file.laz") as f:
+        with pylas.open("grounds.laz", mode="w", header=f.header) as writer:
+            for points in f.chunk_iterator(1_234_567):
+                writer.write_points(points[points.classification == 2]
+
 .. _accessing_header:
 
 Accessing the file header

diff --git a/pylas/__init__.py b/pylas/__init__.py
@@ -1,16 +1,20 @@
 __version__ = "0.4.2"
 
+import logging
+
 from . import errors, vlrs
+from .evlrs import EVLR
 from .headers import HeaderFactory
+from .laswriter import LasWriter
+from .lib import LazBackend
 from .lib import convert, create_from_header
 from .lib import create_las as create
+from .lib import merge_las as merge
 from .lib import mmap_las as mmap
 from .lib import open_las as open
 from .lib import read_las as read
-from .lib import merge_las as merge
 from .point import PointFormat
 from .point.dims import supported_point_formats, supported_versions
 from .point.format import lost_dimensions
 
-import logging
-logging.getLogger(__name__).addHandler(logging.NullHandler())
+logging.getLogger(__name__).addHandler(logging.NullHandler())
diff --git a/pylas/compression.py b/pylas/compression.py
@@ -4,9 +4,9 @@
 
 There are also functions to use Laszip (meant to be used as a fallback)
 """
+import enum
 import os
-import subprocess
-from enum import Enum, auto
+from typing import Tuple
 
 import numpy as np
 
@@ -16,12 +16,10 @@
 
 try:
     import lazperf
-
-    HAS_LAZPERF = True
-    # we should capture ModuleNotFoundError but it's a python3.6 exception type
-    # and ReadTheDocs uses 3.5
-except:
+except ModuleNotFoundError:
     HAS_LAZPERF = False
+else:
+    HAS_LAZPERF = True
 
 
 def raise_if_no_lazperf():
@@ -33,6 +31,51 @@ def raise_if_no_lazperf():
         )
 
 
+class LazBackend(enum.Enum):
+    """Supported backends for reading and writing LAS/LAZ"""
+
+    # type_hint = Union[LazBackend, Iterable[LazBackend]]
+
+    LazrsParallel = 0
+    Lazrs = 1
+    Laszip = 2  # laszip executable, used through a Popen
+
+    def is_available(self) -> bool:
+        """Returns true if the backend is available"""
+        if self == LazBackend.Lazrs or self == LazBackend.LazrsParallel:
+            try:
+                import lazrs
+            except ModuleNotFoundError:
+                return False
+            else:
+                return True
+        elif self == LazBackend.Laszip:
+            try:
+                find_laszip_executable()
+            except FileNotFoundError:
+                return False
+            else:
+                return True
+        else:
+            return False
+
+    @staticmethod
+    def detect_available() -> Tuple["LazBackend"]:
+        """Returns a tuple containing the available backends in the current
+        python environment
+        """
+        available_backends = []
+
+        if LazBackend.LazrsParallel.is_available():
+            available_backends.append(LazBackend.LazrsParallel)
+            available_backends.append(LazBackend.Lazrs)
+
+        if LazBackend.Laszip.is_available():
+            available_backends.append(LazBackend.Laszip)
+
+        return tuple(available_backends)
+
+
 def is_point_format_compressed(point_format_id):
     compression_bit_7 = (point_format_id & 0x80) >> 7
     compression_bit_6 = (point_format_id & 0x40) >> 6
@@ -42,14 +85,16 @@ def is_point_format_compressed(point_format_id):
 
 
 def compressed_id_to_uncompressed(point_format_id):
-    return point_format_id & 0x3f
+    return point_format_id & 0x3F
 
 
 def uncompressed_id_to_compressed(point_format_id):
     return (2 ** 7) | point_format_id
 
 
-def lazrs_decompress_buffer(compressed_buffer, point_size, point_count, laszip_vlr, parallel=True):
+def lazrs_decompress_buffer(
+    compressed_buffer, point_size, point_count, laszip_vlr, parallel=True
+):
     try:
         import lazrs
     except Exception as e:
@@ -61,7 +106,9 @@ def lazrs_decompress_buffer(compressed_buffer, point_size, point_count, laszip_v
 
         point_decompressed = np.zeros(point_count * point_size, np.uint8)
 
-        lazrs.decompress_points(point_compressed, vlr_data, point_decompressed, parallel)
+        lazrs.decompress_points(
+            point_compressed, vlr_data, point_decompressed, parallel
+        )
     except lazrs.LazrsError as e:
         raise LazError("lazrs error: {}".format(e)) from e
     else:
@@ -76,12 +123,11 @@ def lazrs_compress_points(points_data, parallel=True):
 
     try:
         vlr = lazrs.LazVlr.new_for_compression(
-            points_data.point_format.id, points_data.point_format.num_extra_bytes)
+            points_data.point_format.id, points_data.point_format.num_extra_bytes
+        )
 
         compressed_data = lazrs.compress_points(
-            vlr,
-            np.frombuffer(points_data.array, np.uint8),
-            parallel
+            vlr, np.frombuffer(points_data.array, np.uint8), parallel
         )
     except lazrs.LazrsError as e:
         raise LazError("lazrs error: {}".format(e)) from e
@@ -96,16 +142,15 @@ def lazperf_decompress_buffer(compressed_buffer, point_size, point_count, laszip
         point_compressed = np.frombuffer(compressed_buffer, dtype=np.uint8)
 
         vlr_data = np.frombuffer(laszip_vlr.record_data, dtype=np.uint8)
-        decompressor = lazperf.VLRDecompressor(
-            point_compressed, point_size, vlr_data
-        )
+        decompressor = lazperf.VLRDecompressor(point_compressed, point_size, vlr_data)
 
         point_uncompressed = decompressor.decompress_points(point_count)
 
         return point_uncompressed
     except RuntimeError as e:
         raise LazError("lazperf error: {}".format(e))
 
+
 def lazperf_create_laz_vlr(points_record):
     raise_if_no_lazperf()
     try:
@@ -161,68 +206,3 @@ def find_laszip_executable():
 
     else:
         raise FileNotFoundError("Could not find laszip executable")
-
-
-class LasZipProcess:
-    class Actions(Enum):
-        Compress = auto()
-        Decompress = auto()
-
-    def __init__(self, action, stdin=subprocess.PIPE, stdout=subprocess.PIPE):
-        """ Creates a Popen to the laszip executable.
-
-        This tries to be a wrapper for
-        https://docs.python.org/fr/3/library/subprocess.html#subprocess.Popen
-
-        Valid inputs for `stdin` and `stdout` are file objects supporting
-        the fileno() method. For example files opened with  `open`.
-
-        The usage is kinda tricky:
-        """
-        laszip_binary = find_laszip_executable()
-
-        if action == LasZipProcess.Actions.Decompress:
-            out_t = "-olas"
-        elif action == LasZipProcess.Actions.Compress:
-            out_t = "-olaz"
-        else:
-            raise ValueError("Invalid Action")
-
-        self.prc = subprocess.Popen(
-            [laszip_binary, "-stdin", out_t, "-stdout"],
-            stdin=stdin,
-            stdout=stdout,
-            stderr=subprocess.PIPE,
-        )
-
-    @property
-    def stdin(self):
-        return self.prc.stdin
-
-    @property
-    def stdout(self):
-        return self.prc.stdout
-
-    def wait(self):
-        return self.prc.wait()
-
-    def communicate(self):
-        stdout_data, stderr_data = self.prc.communicate()
-        self.raise_if_bad_err_code(stderr_data.decode())
-        return stdout_data
-
-    def raise_if_bad_err_code(self, error_msg=None):
-        if error_msg is None:
-            error_msg = self.prc.stderr.read().decode()
-        if self.prc.returncode != 0:
-            raise RuntimeError(
-                "Laszip failed to {} with error code {}\n\t{}".format(
-                    "compress", self.prc.returncode, "\n\t".join(error_msg.splitlines())
-                )
-            )
-
-    def wait_until_finished(self):
-        self.stdin.close()
-        self.prc.wait()
-        self.raise_if_bad_err_code(self.prc.stderr.read().decode())
-