Skip to content

Commit

Permalink
add histogram LGDO
Browse files Browse the repository at this point in the history
  • Loading branch information
ManuelHu committed Jul 10, 2024
1 parent 5fbe2f2 commit e5ee7fd
Show file tree
Hide file tree
Showing 11 changed files with 320 additions and 1 deletion.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies = [
"colorlog",
"h5py>=3.2",
"hdf5plugin",
"hist",
"numba!=0.53.*,!=0.54.*",
"numexpr",
"numpy>=1.21",
Expand Down
2 changes: 2 additions & 0 deletions src/lgdo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
ArrayOfEncodedEqualSizedArrays,
ArrayOfEqualSizedArrays,
FixedSizeArray,
Histogram,
Scalar,
Struct,
Table,
Expand All @@ -63,6 +64,7 @@
"ArrayOfEqualSizedArrays",
"ArrayOfEncodedEqualSizedArrays",
"FixedSizeArray",
"Histogram",
"LGDO",
"Scalar",
"Struct",
Expand Down
2 changes: 2 additions & 0 deletions src/lgdo/lh5/_serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
_h5_read_ndarray,
)
from .read.composite import (
_h5_read_histogram,
_h5_read_lgdo,
_h5_read_struct,
_h5_read_table,
Expand All @@ -32,6 +33,7 @@
"_h5_read_array_of_equalsized_arrays",
"_h5_read_struct",
"_h5_read_table",
"_h5_read_histogram",
"_h5_read_scalar",
"_h5_read_array_of_encoded_equalsized_arrays",
"_h5_read_vector_of_encoded_vectors",
Expand Down
42 changes: 42 additions & 0 deletions src/lgdo/lh5/_serializers/read/composite.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ArrayOfEncodedEqualSizedArrays,
ArrayOfEqualSizedArrays,
FixedSizeArray,
Histogram,
Scalar,
Struct,
Table,
Expand Down Expand Up @@ -168,6 +169,17 @@ def _h5_read_lgdo(
decompress=decompress,
)

if lgdotype is Histogram:
return _h5_read_histogram(
h5o,
start_row=start_row,
n_rows=n_rows,
idx=idx,
use_h5idx=use_h5idx,
field_mask=field_mask,
decompress=decompress,
)

if lgdotype is ArrayOfEncodedEqualSizedArrays:
return _h5_read_array_of_encoded_equalsized_arrays(
h5o,
Expand Down Expand Up @@ -385,3 +397,33 @@ def _h5_read_table(
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5g)

return obj_buf, n_rows_read


def _h5_read_histogram(
h5g,
start_row=0,
n_rows=sys.maxsize,
idx=None,
use_h5idx=False,
field_mask=None,
decompress=True,
):
struct, n_rows_read = _h5_read_struct(
h5g,
start_row,
n_rows,
idx,
use_h5idx,
field_mask,
decompress,
)
isdensity = struct.isdensity.value
binning = [
(a.binedges.first, a.binedges.last, a.binedges.step, a.closedleft)
for _, a in struct.binning.items()
]
binning = [(v.value for v in b) for b in binning]
weights = struct.weights.view_as("np")
histogram = Histogram(weights, binning, isdensity)

return histogram, n_rows_read
6 changes: 5 additions & 1 deletion src/lgdo/lh5/_serializers/write/composite.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,12 @@ def _h5_write_lgdo(
msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
raise LH5EncodeError(msg, lh5_file, group, name)

# struct or table or waveform table
# struct, table, waveform table or histogram.
if isinstance(obj, types.Struct):
if isinstance(obj, types.Histogram) and wo_mode not in ["w", "of"]:
msg = f"can't append-write histogram in wo_mode '{wo_mode}'"
raise LH5EncodeError(msg, lh5_file, group, name)

return _h5_write_struct(
obj,
name,
Expand Down
1 change: 1 addition & 0 deletions src/lgdo/lh5/datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
lgdo.ArrayOfEncodedEqualSizedArrays,
r"^array_of_encoded_equalsized_arrays<1,1>\{.+\}$",
),
(lgdo.Histogram, r"^struct\{binning,weights,isdensity\}$"),
(lgdo.Struct, r"^struct\{.*\}$"),
(lgdo.Table, r"^table\{.*\}$"),
(lgdo.FixedSizeArray, r"^fixedsize_array<\d+>\{.+\}$"),
Expand Down
1 change: 1 addition & 0 deletions src/lgdo/lh5_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ArrayOfEncodedEqualSizedArrays, # noqa: F401
ArrayOfEqualSizedArrays, # noqa: F401
FixedSizeArray, # noqa: F401
Histogram, # noqa: F401
Scalar,
Struct,
Table, # noqa: F401
Expand Down
2 changes: 2 additions & 0 deletions src/lgdo/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
from .encoded import ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors
from .fixedsizearray import FixedSizeArray
from .histogram import Histogram
from .lgdo import LGDO
from .scalar import Scalar
from .struct import Struct
Expand All @@ -18,6 +19,7 @@
"ArrayOfEqualSizedArrays",
"ArrayOfEncodedEqualSizedArrays",
"FixedSizeArray",
"Histogram",
"LGDO",
"Scalar",
"Struct",
Expand Down
239 changes: 239 additions & 0 deletions src/lgdo/types/histogram.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
from __future__ import annotations

from typing import Any, Self

import hist
import numpy as np

from .array import Array
from .lgdo import LGDO
from .scalar import Scalar
from .struct import Struct


class HistogramAxis(Struct):
def __init__(
self, first: float, last: float, step: float, closedleft: bool = True
) -> None:
"""
Parameters
----------
first
left edge of the leftmost bin
last
right edge of the rightmost bin
step
step size (width of each bin)
closedleft
if True, the bin intervals are left-closed :math:`[a,b)`;
if False, intervals are right-closed :math:`(a,b]`.
"""
super().__init__(
{
"binedges": Struct(
{"first": Scalar(first), "last": Scalar(last), "step": Scalar(step)}
),
"closedleft": Scalar(closedleft),
},
)

@classmethod
def from_edges(cls, edges: np.ndarray) -> Self:
edge_diff = np.diff(edges)
if np.any(~np.isclose(edge_diff, edge_diff[0])):
msg = "only evenly distributed edges can be converted"
raise ValueError(msg)
return cls(edges[0], edges[-1], edge_diff[0], True)

@property
def first(self) -> float:
return self["binedges"]["first"].value

@property
def last(self) -> float:
return self["binedges"]["last"].value

@property
def step(self) -> float:
return self["binedges"]["step"].value

@property
def closedleft(self) -> bool:
return self["closedleft"].value

def __str__(self) -> str:
return (
f"first={self.first}, last={self.last}, step={self.step}, "
+ "closedleft={self.closedleft}"
)


class HistogramBinning(Struct):
def __init__(self, axes: list[HistogramAxis]) -> None:
super().__init__({f"axes_{i}": a for i, a in enumerate(axes)})


class Histogram(Struct):
def __init__(
self,
weights: hist.Hist | np.ndarray,
binning: None
| list[HistogramAxis]
| list[np.ndarray]
| list[tuple[float, float, float, bool]] = None,
isdensity: bool = False,
attrs: dict[str, Any] | None = None,
) -> None:
"""A special struct to contain histogrammed data.
Parameters
----------
weights
An :class:`numpy.ndarray` to be used for this object's internal
array. Note: the array is used directly, not copied; or a :class:`hist.Hist`
object, that will be copied and not used directly.
binning
* has to by None if a :class:`hist.Hist` has been passed as ``weights``
* can be a list of pre-initialized :class:`HistogramAxes`
* can be a list of tuples, representing arguments to :meth:`HistogramAxes.__init__()`
* can be a list of numpy arrays, as returned by :func:`numpy.histogramdd`.
"""
if isinstance(weights, hist.Hist):
if binning is not None:
msg = "not allowed to pass custom binning if constructing from hist.Hist instance"
raise ValueError(msg)
if isdensity:
msg = "not allowed to pass isdensity=True if constructing from hist.Hist instance"
raise ValueError(msg)

w = Array(weights.view())
b = []
for ax in weights.axes:
if not isinstance(ax, hist.axis.Regular):
msg = "only regular axes of hist.Hist can be converted"
raise ValueError(msg)
b.append(HistogramAxis.from_edges(ax.edges))
b = HistogramBinning(b)
else:
if binning is None:
msg = "need to also pass binning if passing histogram as array"
raise ValueError(msg)
w = Array(weights)

if all(isinstance(ax, HistogramAxis) for ax in binning):
b = binning
elif all(isinstance(ax, np.ndarray) for ax in binning):
b = [HistogramAxis.from_edges(ax) for ax in binning]
elif all(isinstance(ax, tuple) for ax in binning):
b = [HistogramAxis(*ax) for ax in binning]
else:
msg = "invalid binning object passed"
raise ValueError(msg)

if len(binning) != len(weights.shape):
msg = "binning and weight dimensions do not match"
raise ValueError(msg)
b = HistogramBinning(b)

super().__init__(
{"binning": b, "weights": w, "isdensity": Scalar(isdensity)},
attrs,
)

@property
def isdensity(self) -> bool:
return self["isdensity"].value

@property
def weights(self) -> Array:
return self["weights"]

@property
def binning(self) -> HistogramBinning:
return self["binning"]

def __getattr__(self, name: str) -> None:
raise AttributeError(name)

def __setitem__(self, name: str, obj: LGDO) -> None:
raise AttributeError(name)

def update_datatype(self) -> None:
self.attrs["datatype"] = self.form_datatype()

def __str__(self) -> str:
thr_orig = np.get_printoptions()["threshold"]
np.set_printoptions(threshold=8)

string = "{\n"
for k, v in self.binning.items():
string += f" '{k}': {v},\n"
string += "}"

attrs = self.getattrs()
if attrs:
string += f" with attrs={attrs}"

np.set_printoptions(threshold=thr_orig)

return string

def view_as(
self,
library: str,
) -> tuple[np.ndarray] | hist.Hist:
r"""View the Histogram data as a third-party format data structure.
This is typically a zero-copy or nearly zero-copy operation.
Supported third-party formats are:
- ``np``: returns a tuple of binning and an :class:`np.ndarray`
- ``hist``: returns an :class:`hist.Hist`
Notes
-----
For the return value of the ``np`` output type, see :func:`numpy.histogramdd`.
Parameters
----------
library
format of the returned data view.
See Also
--------
.LGDO.view_as
"""
if library == "hist":
if self.isdensity:
msg = "hist.Hist cannot represent density histograms"
raise ValueError(msg)

hist_axes = []
for _, a in self.binning.items():
bins = (a.last - a.first) / a.step
assert np.isclose(bins, int(bins))
hist_axes.append(
hist.axis.Regular(
bins=int(bins),
start=a.first,
stop=a.last,
underflow=False,
overflow=False,
)
)

hist_hist = hist.Hist(*hist_axes)
hist_hist[:] = self.weights.view_as("np")
return hist_hist

if library == "np":
edges = []
for _, a in self.binning.items():
bins = (a.last - a.first) / a.step
assert np.isclose(bins, int(bins))
edges.append(np.linspace(a.start, a.last, int(bins)))
return self.weights.view_as("np"), tuple(edges)

msg = f"{library!r} is not a supported third-party format."
raise TypeError(msg)
1 change: 1 addition & 0 deletions tests/lh5/test_lh5_datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def test_datatype2lgdo():
assert d("struct{a,b,c,d}") == types.Struct
assert d("struct{}") == types.Struct
assert d("table{a,b,c,d}") == types.Table
assert d("struct{binning,weights,isdensity}") == types.Histogram


def test_utils():
Expand Down
Loading

0 comments on commit e5ee7fd

Please sign in to comment.