Merge branch 'feature/rework-point-format'

tmontaigu · Sep 30, 2020 · 4bfe4d7 · 4bfe4d7
2 parents 8a3250f + 8b027ad
commit 4bfe4d7
Show file tree

Hide file tree

Showing 14 changed files with 304 additions and 187 deletions.
diff --git a/docs/basic.rst b/docs/basic.rst
@@ -121,32 +121,52 @@ To access point records using the dimension name, you have 2 options:
 >>> np.all(las.user_data == las['user_data'])
 True
 
+Point Format
+------------
+
 The dimensions available in a file are dictated by the point format id.
 The tables in the introduction section contains the list of dimensions for each of the
 point format.
-To get the point format of a file you have to access it through the points_data member:
+To get the point format of a file you have to access it through the las object:
 
->>> point_format = las.points_data.point_format
+>>> point_format = las.point_format
 >>> point_format
-<PointFormat(3)>
+<PointFormat(3, 0 bytes of extra dims)>
 >>> point_format.id
 3
 
-If you don't want to rember the dimensions for each point format,
+If you don't want to remember the dimensions for each point format,
 you can access the list of available dimensions in the file you read just like that:
 
->>> point_format.dimension_names
-('X', 'Y', 'Z', 'intensity', 'return_number', 'number_of_returns', 'scan_direction_flag', 'edge_of_flight_line', 'classification', 'synthetic', 'key_point', 'withheld', 'scan_angle_rank', 'user_data', 'point_source_id', 'gps_time', 'red', 'green', 'blue')
+>>> list(point_format.dimension_names)
+['X', 'Y', 'Z', 'intensity', 'return_number', 'number_of_returns', 'scan_direction_flag', 'edge_of_flight_line', 'classification', 'synthetic', 'key_point', 'withheld', 'scan_angle_rank', 'user_data', 'point_source_id', 'gps_time', 'red', 'green', 'blue']
 
 This gives you all the dimension names, including extra dimensions if any.
 If you wish to get only the extra dimension names the point format can give them to you:
 
->>> point_format.extra_dimension_names
+>>> list(point_format.standard_dimension_names)
+['X', 'Y', 'Z', 'intensity', 'return_number', 'number_of_returns', 'scan_direction_flag', 'edge_of_flight_line', 'classification', 'synthetic', 'key_point', 'withheld', 'scan_angle_rank', 'user_data', 'point_source_id', 'gps_time', 'red', 'green', 'blue']
+>>> list(point_format.extra_dimension_names)
 []
 >>> las = pylas.read('pylastests/extra.laz')
->>> las.points_data.point_format.extra_dimension_names
+>>> list(las.point_format.extra_dimension_names)
 ['Colors', 'Reserved', 'Flags', 'Intensity', 'Time']
 
+You can also have more information:
+
+>>> point_format[3].name
+'intensity'
+>>> point_format[3].num_bits
+16
+>>> point_format[3].kind
+<DimensionKind.UnsignedInteger: 1>
+>>> point_format[3].max
+65535
+
+
+
+
+
 .. _manipulating_vlrs:
 
 Manipulating VLRs
@@ -159,8 +179,7 @@ To access the VLRs stored in a file, simply access the `vlr` member of the las o
 [<ExtraBytesVlr(extra bytes structs: 5)>]
 
 >>> with pylas.open('pylastests/extrabytes.las') as f:
-...     vlr_list = f.read_vlrs()
->>> vlr_list
+...     f.vlrs
 [<ExtraBytesVlr(extra bytes structs: 5)>]
 
 

diff --git a/docs/lessbasic.rst b/docs/lessbasic.rst
@@ -125,6 +125,7 @@ will be able to automatically parse the VLR when reading the file & write it whe
 ...     def __repr__(self):
 ...         return "<MyCustomVLR>"
 
+>>> import numpy as np
 >>> cvlr = CustomVLR()
 >>> cvlr.numbers
 []
@@ -133,6 +134,7 @@ will be able to automatically parse the VLR when reading the file & write it whe
 >>> las.vlrs.append(cvlr)
 >>> las.vlrs
 [<MyCustomVLR>]
+>>> las.x = np.array([1.0, 2.0])
 >>> las = pylas.lib.write_then_read_again(las)
 >>> las.vlrs
 [<MyCustomVLR>]

diff --git a/pylas/__init__.py b/pylas/__init__.py
@@ -14,7 +14,7 @@
 from .lib import mmap_las as mmap
 from .lib import open_las as open
 from .lib import read_las as read
-from .point import PointFormat
+from .point import PointFormat, DimensionKind, DimensionInfo
 from .point.dims import supported_point_formats, supported_versions
 from .point.format import lost_dimensions
 

diff --git a/pylas/lasdatas/base.py b/pylas/lasdatas/base.py
@@ -157,7 +157,7 @@ def add_extra_dim(self, name, type, description=""):
             self.vlrs.append(extra_bytes_vlr)
         finally:
             extra_bytes_vlr.extra_bytes_structs.append(extra_byte)
-            self.points.add_extra_dims([(name, type)])
+            self.points.add_extra_dim(name, type)
 
     def update_header(self):
         """Update the information stored in the header

diff --git a/pylas/laswriter.py b/pylas/laswriter.py
@@ -107,8 +107,9 @@ def write(self, points: PointRecord) -> None:
                 self.vlrs.index("ExtraBytesVlr")
             except ValueError:
                 extra_bytes_vlr = ExtraBytesVlr()
-                for name, type_str in self.point_format.extra_dims:
-                    name = name.replace(" ", "_")
+                for dim_info in self.point_format.extra_dimensions:
+                    name = dim_info.name.replace(" ", "_")
+                    type_str = dim_info.type_str()
                     if type_str.endswith("u1"):
                         extra_byte = ExtraBytesStruct(
                             data_type=0,

diff --git a/pylas/lib.py b/pylas/lib.py
@@ -327,8 +327,9 @@ def convert(source_las, *, point_format_id=None, file_version=None):
     header.point_format_id = point_format_id
 
     point_format = PointFormat(
-        point_format_id, source_las.points.point_format.extra_dims
+        point_format_id
     )
+    point_format.dimensions.extend(source_las.point_format.extra_dimensions)
     points = record.PackedPointRecord.from_point_record(
         source_las.points, point_format
     )

diff --git a/pylas/point/__init__.py b/pylas/point/__init__.py
@@ -1 +1,2 @@
 from .format import PointFormat
+from .dims import DimensionKind, DimensionInfo
diff --git a/pylas/point/dims.py b/pylas/point/dims.py
@@ -2,15 +2,30 @@
 the mapping between dimension names and their type, mapping between point format and
 compatible file version
 """
+import itertools
 import operator
-from collections import namedtuple
+from collections import UserDict
+from enum import Enum
+from typing import NamedTuple, Optional, Dict, Tuple
 
 import numpy as np
 
 from . import packing
 from .. import errors
 
 
+class PointFormatDict(UserDict):
+
+    def __init__(self, wrapped_dict):
+        super().__init__(wrapped_dict)
+
+    def __getitem__(self, key):
+        try:
+            return self.data[key]
+        except KeyError:
+            raise errors.PointFormatNotSupported(key) from None
+
+
 def _point_format_to_dtype(point_format, dimensions):
     """build the numpy.dtype for a point format
 
@@ -83,7 +98,7 @@ def _build_unpacked_point_formats_dtypes(
     "z_t": ("z_t", "f4"),
     # Las 1.4
     "classification_flags": ("classification_flags", "u1"),
-    "scan_angle": ("scan_angle_rank", "i2"),
+    "scan_angle": ("scan_angle", "i2"),
     "classification": ("classification", "u1"),
     "nir": ("nir", "u2"),
 }
@@ -126,7 +141,7 @@ def _build_unpacked_point_formats_dtypes(
 
 COLOR_FIELDS_NAMES = ("red", "green", "blue")
 
-POINT_FORMAT_DIMENSIONS = {
+POINT_FORMAT_DIMENSIONS = PointFormatDict({
     0: POINT_FORMAT_0,
     1: POINT_FORMAT_0 + ("gps_time",),
     2: POINT_FORMAT_0 + COLOR_FIELDS_NAMES,
@@ -138,7 +153,7 @@ def _build_unpacked_point_formats_dtypes(
     8: POINT_FORMAT_6 + COLOR_FIELDS_NAMES + ("nir",),
     9: POINT_FORMAT_6 + WAVEFORM_FIELDS_NAMES,
     10: POINT_FORMAT_6 + COLOR_FIELDS_NAMES + ("nir",) + WAVEFORM_FIELDS_NAMES,
-}
+})
 
 # sub fields of the 'bit_fields' dimension
 RETURN_NUMBER_MASK_0 = 0b00000111
@@ -167,7 +182,13 @@ def _build_unpacked_point_formats_dtypes(
 SCAN_DIRECTION_FLAG_MASK_6 = 0b01000000
 EDGE_OF_FLIGHT_LINE_MASK_6 = 0b10000000
 
-SubField = namedtuple("SubField", ("name", "mask", "type"))
+
+class SubField(NamedTuple):
+    name: str
+    mask: int
+    type: str
+
+
 COMPOSED_FIELDS_0 = {
     "bit_fields": [
         SubField("return_number", RETURN_NUMBER_MASK_0, "u1"),
@@ -199,7 +220,7 @@ def _build_unpacked_point_formats_dtypes(
 }
 
 # Dict giving the composed fields for each point_format_id
-COMPOSED_FIELDS = {
+COMPOSED_FIELDS = PointFormatDict({
     0: COMPOSED_FIELDS_0,
     1: COMPOSED_FIELDS_0,
     2: COMPOSED_FIELDS_0,
@@ -211,7 +232,7 @@ def _build_unpacked_point_formats_dtypes(
     8: COMPOSED_FIELDS_6,
     9: COMPOSED_FIELDS_6,
     10: COMPOSED_FIELDS_6,
-}
+})
 
 VERSION_TO_POINT_FMT = {
     "1.2": (0, 1, 2, 3),
@@ -222,52 +243,123 @@ def _build_unpacked_point_formats_dtypes(
 POINT_FORMATS_DTYPE = _build_point_formats_dtypes(POINT_FORMAT_DIMENSIONS, DIMENSIONS)
 
 # This Dict maps point_format_ids to their dimensions names
-ALL_POINT_FORMATS_DIMENSIONS = {**POINT_FORMAT_DIMENSIONS}
+ALL_POINT_FORMATS_DIMENSIONS = PointFormatDict({**POINT_FORMAT_DIMENSIONS})
 # This Dict maps point_format_ids to their numpy.dtype
 # the dtype corresponds to the de packed data
-ALL_POINT_FORMATS_DTYPE = {**POINT_FORMATS_DTYPE}
-# This Dict maps point_format_ids to their numpy.dtype
-# the dtype corresponds to the unpacked data
-UNPACKED_POINT_FORMATS_DTYPES = _build_unpacked_point_formats_dtypes(
-    POINT_FORMAT_DIMENSIONS, COMPOSED_FIELDS, DIMENSIONS
-)
+ALL_POINT_FORMATS_DTYPE = PointFormatDict({**POINT_FORMATS_DTYPE})
+
+
+def get_sub_fields_dict(point_format_id: int) -> Dict[str, Tuple[str, SubField]]:
+    sub_fields_dict = {}
+    for composed_dim_name, sub_fields in COMPOSED_FIELDS[point_format_id].items():
+        for sub_field in sub_fields:
+            sub_fields_dict[sub_field.name] = (composed_dim_name, sub_field)
+    return sub_fields_dict
+
+
+class DimensionKind(Enum):
+    SignedInteger = 0
+    UnsignedInteger = 1
+    FloatingPoint = 2
+    BitField = 3
+
+    @classmethod
+    def from_letter(cls, letter: str) -> 'DimensionKind':
+        if letter == 'u':
+            return cls.UnsignedInteger
+        elif letter == 'i':
+            return cls.SignedInteger
+        elif letter == 'f':
+            return cls.FloatingPoint
+        else:
+            raise ValueError(f"Unknown type letter '{letter}'")
+
+    def letter(self) -> Optional[str]:
+        if self == DimensionKind.UnsignedInteger:
+            return "u"
+        elif self == DimensionKind.SignedInteger:
+            return "i"
+        elif self == DimensionKind.FloatingPoint:
+            return "f"
+        else:
+            return None
 
 
-def np_dtype_to_point_format(dtype, unpacked=False):
-    """Tries to find a matching point format id for the input numpy dtype
-    To match, the input dtype has to be 100% equal to a point format dtype
-    so all names & dimensions types must match
+def num_bit_set(n: int) -> int:
+    """Count the number of bits that are set (1) in the number n
 
-    Parameters:
-    ----------
-    dtype : numpy.dtype
-        The input dtype
-    unpacked : bool, optional
-        [description] (the default is False, which [default_description])
+    Brian Kernighan's algorithm
+    """
+    count = 0
+    while n != 0:
+        count += 1
+        n = n & (n - 1)
+    return count
 
-    Raises
-    ------
-    errors.IncompatibleDataFormat
-        If No compatible point format was found
 
-    Returns
-    -------
-    int
-        The compatible point format found
+class DimensionInfo(NamedTuple):
+    """ Tuple that contains information of a dimension
+
     """
+    name: str
+    kind: DimensionKind
+    num_bits: int
+    num_elements: int = 1
+    is_standard: bool = True
+
+    @classmethod
+    def from_type_str(cls, name: str, type_str: str, is_standard: bool = True) -> 'DimensionInfo':
+        first_digits = "".join(itertools.takewhile(lambda l: l.isdigit(), type_str))
+        if first_digits:
+            num_elements = int(first_digits)
+            type_str = type_str[len(first_digits):]
+        else:
+            num_elements = 1
 
-    all_dtypes = (
-        ALL_POINT_FORMATS_DTYPE if not unpacked else UNPACKED_POINT_FORMATS_DTYPES
-    )
-    for format_id, fmt_dtype in all_dtypes.items():
-        if fmt_dtype == dtype:
-            return format_id
+        kind = DimensionKind.from_letter(type_str[0])
+        num_bits = int(type_str[1:]) * 8 * num_elements
 
-    raise errors.IncompatibleDataFormat(
-        "Data type of array is not compatible with any point format (array dtype: {})".format(
-            dtype
-        )
-    )
+        return cls(name, kind, num_bits, num_elements, is_standard)
+
+    @classmethod
+    def from_bitmask(cls, name: str, bit_mask: int, is_standard: bool = False) -> 'DimensionInfo':
+        kind = DimensionKind.BitField
+        bit_size = num_bit_set(bit_mask)
+        return cls(name, kind, bit_size, is_standard=is_standard)
+
+    @property
+    def num_bytes(self) -> int:
+        return int(self.num_bits // 8)
+
+    @property
+    def num_bytes_singular_element(self) -> int:
+        return int(self.num_bits // (8 * self.num_elements))
+
+    @property
+    def max(self):
+        if self.kind == DimensionKind.BitField:
+            return (2 ** self.num_bits) - 1
+        elif self.kind == DimensionKind.FloatingPoint:
+            return np.finfo(self.type_str()).max
+        else:
+            return np.iinfo(self.type_str()).max
+
+    @property
+    def min(self):
+        if self.kind == DimensionKind.BitField or self.kind == DimensionKind.UnsignedInteger:
+            return 0
+        elif self.kind == DimensionKind.FloatingPoint:
+            return np.finfo(self.type_str()).min
+        else:
+            return np.iinfo(self.type_str()).min
+
+    def type_str(self) -> Optional[str]:
+        if self.kind == DimensionKind.BitField:
+            return None
+
+        if self.num_elements == 1:
+            return f"{self.kind.letter()}{self.num_bytes_singular_element}"
+        return f"{self.num_elements}{self.kind.letter()}{self.num_bytes_singular_element}"
 
 
 def size_of_point_format_id(point_format_id):
@@ -509,7 +601,6 @@ def __sub__(self, other):
     def __add__(self, other):
         return ScaledArrayView(self.array + self._remove_scale(other), self.scale, self.offset)
 
-
     def __getitem__(self, item):
         if isinstance(item, int):
             return self._apply_scale(self.array[item])