Skip to content
This repository has been archived by the owner on Aug 18, 2022. It is now read-only.

Commit

Permalink
Merge branch 'feature/rework-point-format'
Browse files Browse the repository at this point in the history
  • Loading branch information
tmontaigu committed Sep 30, 2020
2 parents 8a3250f + 8b027ad commit 4bfe4d7
Show file tree
Hide file tree
Showing 14 changed files with 304 additions and 187 deletions.
39 changes: 29 additions & 10 deletions docs/basic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,32 +121,52 @@ To access point records using the dimension name, you have 2 options:
>>> np.all(las.user_data == las['user_data'])
True

Point Format
------------

The dimensions available in a file are dictated by the point format id.
The tables in the introduction section contains the list of dimensions for each of the
point format.
To get the point format of a file you have to access it through the points_data member:
To get the point format of a file you have to access it through the las object:

>>> point_format = las.points_data.point_format
>>> point_format = las.point_format
>>> point_format
<PointFormat(3)>
<PointFormat(3, 0 bytes of extra dims)>
>>> point_format.id
3

If you don't want to rember the dimensions for each point format,
If you don't want to remember the dimensions for each point format,
you can access the list of available dimensions in the file you read just like that:

>>> point_format.dimension_names
('X', 'Y', 'Z', 'intensity', 'return_number', 'number_of_returns', 'scan_direction_flag', 'edge_of_flight_line', 'classification', 'synthetic', 'key_point', 'withheld', 'scan_angle_rank', 'user_data', 'point_source_id', 'gps_time', 'red', 'green', 'blue')
>>> list(point_format.dimension_names)
['X', 'Y', 'Z', 'intensity', 'return_number', 'number_of_returns', 'scan_direction_flag', 'edge_of_flight_line', 'classification', 'synthetic', 'key_point', 'withheld', 'scan_angle_rank', 'user_data', 'point_source_id', 'gps_time', 'red', 'green', 'blue']

This gives you all the dimension names, including extra dimensions if any.
If you wish to get only the extra dimension names the point format can give them to you:

>>> point_format.extra_dimension_names
>>> list(point_format.standard_dimension_names)
['X', 'Y', 'Z', 'intensity', 'return_number', 'number_of_returns', 'scan_direction_flag', 'edge_of_flight_line', 'classification', 'synthetic', 'key_point', 'withheld', 'scan_angle_rank', 'user_data', 'point_source_id', 'gps_time', 'red', 'green', 'blue']
>>> list(point_format.extra_dimension_names)
[]
>>> las = pylas.read('pylastests/extra.laz')
>>> las.points_data.point_format.extra_dimension_names
>>> list(las.point_format.extra_dimension_names)
['Colors', 'Reserved', 'Flags', 'Intensity', 'Time']

You can also have more information:

>>> point_format[3].name
'intensity'
>>> point_format[3].num_bits
16
>>> point_format[3].kind
<DimensionKind.UnsignedInteger: 1>
>>> point_format[3].max
65535





.. _manipulating_vlrs:

Manipulating VLRs
Expand All @@ -159,8 +179,7 @@ To access the VLRs stored in a file, simply access the `vlr` member of the las o
[<ExtraBytesVlr(extra bytes structs: 5)>]

>>> with pylas.open('pylastests/extrabytes.las') as f:
... vlr_list = f.read_vlrs()
>>> vlr_list
... f.vlrs
[<ExtraBytesVlr(extra bytes structs: 5)>]


Expand Down
2 changes: 2 additions & 0 deletions docs/lessbasic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ will be able to automatically parse the VLR when reading the file & write it whe
... def __repr__(self):
... return "<MyCustomVLR>"

>>> import numpy as np
>>> cvlr = CustomVLR()
>>> cvlr.numbers
[]
Expand All @@ -133,6 +134,7 @@ will be able to automatically parse the VLR when reading the file & write it whe
>>> las.vlrs.append(cvlr)
>>> las.vlrs
[<MyCustomVLR>]
>>> las.x = np.array([1.0, 2.0])
>>> las = pylas.lib.write_then_read_again(las)
>>> las.vlrs
[<MyCustomVLR>]
Expand Down
2 changes: 1 addition & 1 deletion pylas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .lib import mmap_las as mmap
from .lib import open_las as open
from .lib import read_las as read
from .point import PointFormat
from .point import PointFormat, DimensionKind, DimensionInfo
from .point.dims import supported_point_formats, supported_versions
from .point.format import lost_dimensions

Expand Down
2 changes: 1 addition & 1 deletion pylas/lasdatas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def add_extra_dim(self, name, type, description=""):
self.vlrs.append(extra_bytes_vlr)
finally:
extra_bytes_vlr.extra_bytes_structs.append(extra_byte)
self.points.add_extra_dims([(name, type)])
self.points.add_extra_dim(name, type)

def update_header(self):
"""Update the information stored in the header
Expand Down
5 changes: 3 additions & 2 deletions pylas/laswriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ def write(self, points: PointRecord) -> None:
self.vlrs.index("ExtraBytesVlr")
except ValueError:
extra_bytes_vlr = ExtraBytesVlr()
for name, type_str in self.point_format.extra_dims:
name = name.replace(" ", "_")
for dim_info in self.point_format.extra_dimensions:
name = dim_info.name.replace(" ", "_")
type_str = dim_info.type_str()
if type_str.endswith("u1"):
extra_byte = ExtraBytesStruct(
data_type=0,
Expand Down
3 changes: 2 additions & 1 deletion pylas/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,9 @@ def convert(source_las, *, point_format_id=None, file_version=None):
header.point_format_id = point_format_id

point_format = PointFormat(
point_format_id, source_las.points.point_format.extra_dims
point_format_id
)
point_format.dimensions.extend(source_las.point_format.extra_dimensions)
points = record.PackedPointRecord.from_point_record(
source_las.points, point_format
)
Expand Down
1 change: 1 addition & 0 deletions pylas/point/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .format import PointFormat
from .dims import DimensionKind, DimensionInfo
179 changes: 135 additions & 44 deletions pylas/point/dims.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,30 @@
the mapping between dimension names and their type, mapping between point format and
compatible file version
"""
import itertools
import operator
from collections import namedtuple
from collections import UserDict
from enum import Enum
from typing import NamedTuple, Optional, Dict, Tuple

import numpy as np

from . import packing
from .. import errors


class PointFormatDict(UserDict):

def __init__(self, wrapped_dict):
super().__init__(wrapped_dict)

def __getitem__(self, key):
try:
return self.data[key]
except KeyError:
raise errors.PointFormatNotSupported(key) from None


def _point_format_to_dtype(point_format, dimensions):
"""build the numpy.dtype for a point format
Expand Down Expand Up @@ -83,7 +98,7 @@ def _build_unpacked_point_formats_dtypes(
"z_t": ("z_t", "f4"),
# Las 1.4
"classification_flags": ("classification_flags", "u1"),
"scan_angle": ("scan_angle_rank", "i2"),
"scan_angle": ("scan_angle", "i2"),
"classification": ("classification", "u1"),
"nir": ("nir", "u2"),
}
Expand Down Expand Up @@ -126,7 +141,7 @@ def _build_unpacked_point_formats_dtypes(

COLOR_FIELDS_NAMES = ("red", "green", "blue")

POINT_FORMAT_DIMENSIONS = {
POINT_FORMAT_DIMENSIONS = PointFormatDict({
0: POINT_FORMAT_0,
1: POINT_FORMAT_0 + ("gps_time",),
2: POINT_FORMAT_0 + COLOR_FIELDS_NAMES,
Expand All @@ -138,7 +153,7 @@ def _build_unpacked_point_formats_dtypes(
8: POINT_FORMAT_6 + COLOR_FIELDS_NAMES + ("nir",),
9: POINT_FORMAT_6 + WAVEFORM_FIELDS_NAMES,
10: POINT_FORMAT_6 + COLOR_FIELDS_NAMES + ("nir",) + WAVEFORM_FIELDS_NAMES,
}
})

# sub fields of the 'bit_fields' dimension
RETURN_NUMBER_MASK_0 = 0b00000111
Expand Down Expand Up @@ -167,7 +182,13 @@ def _build_unpacked_point_formats_dtypes(
SCAN_DIRECTION_FLAG_MASK_6 = 0b01000000
EDGE_OF_FLIGHT_LINE_MASK_6 = 0b10000000

SubField = namedtuple("SubField", ("name", "mask", "type"))

class SubField(NamedTuple):
name: str
mask: int
type: str


COMPOSED_FIELDS_0 = {
"bit_fields": [
SubField("return_number", RETURN_NUMBER_MASK_0, "u1"),
Expand Down Expand Up @@ -199,7 +220,7 @@ def _build_unpacked_point_formats_dtypes(
}

# Dict giving the composed fields for each point_format_id
COMPOSED_FIELDS = {
COMPOSED_FIELDS = PointFormatDict({
0: COMPOSED_FIELDS_0,
1: COMPOSED_FIELDS_0,
2: COMPOSED_FIELDS_0,
Expand All @@ -211,7 +232,7 @@ def _build_unpacked_point_formats_dtypes(
8: COMPOSED_FIELDS_6,
9: COMPOSED_FIELDS_6,
10: COMPOSED_FIELDS_6,
}
})

VERSION_TO_POINT_FMT = {
"1.2": (0, 1, 2, 3),
Expand All @@ -222,52 +243,123 @@ def _build_unpacked_point_formats_dtypes(
POINT_FORMATS_DTYPE = _build_point_formats_dtypes(POINT_FORMAT_DIMENSIONS, DIMENSIONS)

# This Dict maps point_format_ids to their dimensions names
ALL_POINT_FORMATS_DIMENSIONS = {**POINT_FORMAT_DIMENSIONS}
ALL_POINT_FORMATS_DIMENSIONS = PointFormatDict({**POINT_FORMAT_DIMENSIONS})
# This Dict maps point_format_ids to their numpy.dtype
# the dtype corresponds to the de packed data
ALL_POINT_FORMATS_DTYPE = {**POINT_FORMATS_DTYPE}
# This Dict maps point_format_ids to their numpy.dtype
# the dtype corresponds to the unpacked data
UNPACKED_POINT_FORMATS_DTYPES = _build_unpacked_point_formats_dtypes(
POINT_FORMAT_DIMENSIONS, COMPOSED_FIELDS, DIMENSIONS
)
ALL_POINT_FORMATS_DTYPE = PointFormatDict({**POINT_FORMATS_DTYPE})


def get_sub_fields_dict(point_format_id: int) -> Dict[str, Tuple[str, SubField]]:
sub_fields_dict = {}
for composed_dim_name, sub_fields in COMPOSED_FIELDS[point_format_id].items():
for sub_field in sub_fields:
sub_fields_dict[sub_field.name] = (composed_dim_name, sub_field)
return sub_fields_dict


class DimensionKind(Enum):
SignedInteger = 0
UnsignedInteger = 1
FloatingPoint = 2
BitField = 3

@classmethod
def from_letter(cls, letter: str) -> 'DimensionKind':
if letter == 'u':
return cls.UnsignedInteger
elif letter == 'i':
return cls.SignedInteger
elif letter == 'f':
return cls.FloatingPoint
else:
raise ValueError(f"Unknown type letter '{letter}'")

def letter(self) -> Optional[str]:
if self == DimensionKind.UnsignedInteger:
return "u"
elif self == DimensionKind.SignedInteger:
return "i"
elif self == DimensionKind.FloatingPoint:
return "f"
else:
return None


def np_dtype_to_point_format(dtype, unpacked=False):
"""Tries to find a matching point format id for the input numpy dtype
To match, the input dtype has to be 100% equal to a point format dtype
so all names & dimensions types must match
def num_bit_set(n: int) -> int:
"""Count the number of bits that are set (1) in the number n
Parameters:
----------
dtype : numpy.dtype
The input dtype
unpacked : bool, optional
[description] (the default is False, which [default_description])
Brian Kernighan's algorithm
"""
count = 0
while n != 0:
count += 1
n = n & (n - 1)
return count

Raises
------
errors.IncompatibleDataFormat
If No compatible point format was found

Returns
-------
int
The compatible point format found
class DimensionInfo(NamedTuple):
""" Tuple that contains information of a dimension
"""
name: str
kind: DimensionKind
num_bits: int
num_elements: int = 1
is_standard: bool = True

@classmethod
def from_type_str(cls, name: str, type_str: str, is_standard: bool = True) -> 'DimensionInfo':
first_digits = "".join(itertools.takewhile(lambda l: l.isdigit(), type_str))
if first_digits:
num_elements = int(first_digits)
type_str = type_str[len(first_digits):]
else:
num_elements = 1

all_dtypes = (
ALL_POINT_FORMATS_DTYPE if not unpacked else UNPACKED_POINT_FORMATS_DTYPES
)
for format_id, fmt_dtype in all_dtypes.items():
if fmt_dtype == dtype:
return format_id
kind = DimensionKind.from_letter(type_str[0])
num_bits = int(type_str[1:]) * 8 * num_elements

raise errors.IncompatibleDataFormat(
"Data type of array is not compatible with any point format (array dtype: {})".format(
dtype
)
)
return cls(name, kind, num_bits, num_elements, is_standard)

@classmethod
def from_bitmask(cls, name: str, bit_mask: int, is_standard: bool = False) -> 'DimensionInfo':
kind = DimensionKind.BitField
bit_size = num_bit_set(bit_mask)
return cls(name, kind, bit_size, is_standard=is_standard)

@property
def num_bytes(self) -> int:
return int(self.num_bits // 8)

@property
def num_bytes_singular_element(self) -> int:
return int(self.num_bits // (8 * self.num_elements))

@property
def max(self):
if self.kind == DimensionKind.BitField:
return (2 ** self.num_bits) - 1
elif self.kind == DimensionKind.FloatingPoint:
return np.finfo(self.type_str()).max
else:
return np.iinfo(self.type_str()).max

@property
def min(self):
if self.kind == DimensionKind.BitField or self.kind == DimensionKind.UnsignedInteger:
return 0
elif self.kind == DimensionKind.FloatingPoint:
return np.finfo(self.type_str()).min
else:
return np.iinfo(self.type_str()).min

def type_str(self) -> Optional[str]:
if self.kind == DimensionKind.BitField:
return None

if self.num_elements == 1:
return f"{self.kind.letter()}{self.num_bytes_singular_element}"
return f"{self.num_elements}{self.kind.letter()}{self.num_bytes_singular_element}"


def size_of_point_format_id(point_format_id):
Expand Down Expand Up @@ -509,7 +601,6 @@ def __sub__(self, other):
def __add__(self, other):
return ScaledArrayView(self.array + self._remove_scale(other), self.scale, self.offset)


def __getitem__(self, item):
if isinstance(item, int):
return self._apply_scale(self.array[item])
Expand Down

0 comments on commit 4bfe4d7

Please sign in to comment.