Skip to content

Commit

Permalink
Utils for discovering attached data and dropping in-memory data (#1361)
Browse files Browse the repository at this point in the history
  • Loading branch information
pzelasko committed Jun 24, 2024
1 parent d1f94c0 commit e3bed73
Show file tree
Hide file tree
Showing 10 changed files with 274 additions and 9 deletions.
20 changes: 19 additions & 1 deletion lhotse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import numpy as np

from lhotse.utils import Pathlike, Seconds, fastcopy, ifnone
from lhotse.utils import Pathlike, Seconds, fastcopy


@dataclass
Expand Down Expand Up @@ -51,6 +51,16 @@ class Array:
def ndim(self) -> int:
return len(self.shape)

@property
def is_in_memory(self) -> bool:
from lhotse.features.io import is_in_memory

return is_in_memory(self.storage_type)

@property
def is_placeholder(self) -> bool:
return self.storage_type == "shar"

def to_dict(self) -> dict:
return asdict(self)

Expand Down Expand Up @@ -157,6 +167,14 @@ class TemporalArray:
# the shape, temporal_dim, and frame_shift.
start: Seconds

@property
def is_in_memory(self) -> bool:
return self.array.is_in_memory

@property
def is_placeholder(self) -> bool:
return self.array.is_placeholder

@property
def shape(self) -> List[int]:
return self.array.shape
Expand Down
8 changes: 8 additions & 0 deletions lhotse/audio/recording.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,14 @@ def _video_source(self) -> Optional[AudioSource]:
return s
return None

@property
def is_in_memory(self) -> bool:
return any(s.type == "memory" for s in self.sources)

@property
def is_placeholder(self) -> bool:
return any(s.type == "shar" for s in self.sources)

@property
def num_channels(self) -> int:
return len(self.channel_ids)
Expand Down
3 changes: 3 additions & 0 deletions lhotse/cut/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ class Cut:
drop_features: Callable
drop_recording: Callable
drop_supervisions: Callable
drop_alignments: Callable
drop_in_memory_data: Callable
iter_data: Callable
truncate: Callable
pad: Callable
extend_by: Callable
Expand Down
68 changes: 67 additions & 1 deletion lhotse/cut/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@
from dataclasses import dataclass, field
from decimal import ROUND_DOWN
from math import isclose
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
from typing import (
Any,
Callable,
Dict,
Generator,
Iterable,
List,
Optional,
Tuple,
Union,
)

import numpy as np
import torch
from intervaltree import IntervalTree

from lhotse.array import Array, TemporalArray
from lhotse.audio import Recording, VideoInfo
from lhotse.augmentation import AugmentFn
from lhotse.custom import CustomFieldMixin
Expand Down Expand Up @@ -81,6 +92,32 @@ def to_dict(self) -> dict:
d["custom"][k] = v.to_dict()
return {**d, "type": type(self).__name__}

def iter_data(
self,
) -> Generator[
Tuple[str, Union[Recording, Features, Array, TemporalArray]], None, None
]:
"""
Iterate over each data piece attached to this cut.
Returns a generator yielding tuples of ``(key, manifest)``, where
``key`` is the name of the attribute under which ``manifest`` is found.
``manifest`` is of type :class:`~lhotse.Recording`, :class:`~lhotse.Features`,
:class:`~lhotse.TemporalArray`, or :class:`~lhotse.Array`.
For example, if ``key`` is ``recording``, then ``manifest`` is ``self.recording``.
"""
if self.has_recording:
yield "recording", self.recording
if self.has_features:
yield "features", self.features
for k, v in (self.custom or {}).items():
if isinstance(v, (Recording, Features, Array, TemporalArray)):
yield k, v

@property
def is_in_memory(self) -> bool:
return any(v.is_in_memory for k, v in self.iter_data())

@property
def recording_id(self) -> str:
return self.recording.id if self.has_recording else self.features.recording_id
Expand Down Expand Up @@ -327,6 +364,35 @@ def drop_alignments(self) -> "DataCut":
self, supervisions=[fastcopy(s, alignment={}) for s in self.supervisions]
)

def drop_in_memory_data(self) -> "DataCut":
"""
Return a copy of the current :class:`.DataCut`, detached from any in-memory data.
The manifests for in-memory data are converted into placeholders that can still be looked up for
metadata, but will fail on attempts to load the data.
"""
from lhotse.shar.utils import to_shar_placeholder

custom = None
if self.custom is not None:
custom = self.custom.copy()
for k in custom:
v = custom[k]
if (
isinstance(v, (Recording, Features, Array, TemporalArray))
and v.is_in_memory
):
custom[k] = to_shar_placeholder(v)
return fastcopy(
self,
recording=to_shar_placeholder(self.recording)
if self.has_recording and self.recording.is_in_memory
else self.recording,
features=to_shar_placeholder(self.features)
if self.has_features and self.features.is_in_memory
else self.features,
custom=custom,
)

def fill_supervision(
self, add_empty: bool = True, shrink_ok: bool = False
) -> "DataCut":
Expand Down
55 changes: 52 additions & 3 deletions lhotse/cut/mixed.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,23 @@
from functools import partial, reduce
from io import BytesIO
from operator import add
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
from typing import (
Any,
Callable,
Dict,
Generator,
Iterable,
List,
Optional,
Tuple,
Union,
)

import numpy as np
import torch
from intervaltree import IntervalTree

from lhotse.array import Array, TemporalArray
from lhotse.audio import Recording, VideoInfo, get_audio_duration_mismatch_tolerance
from lhotse.audio.backend import save_audio
from lhotse.audio.mixer import AudioMixer, VideoMixer, audio_energy
Expand All @@ -27,6 +38,7 @@
FeatureMixer,
create_default_feature_extractor,
)
from lhotse.features.base import Features
from lhotse.features.io import FeaturesWriter
from lhotse.supervision import SupervisionSegment
from lhotse.utils import (
Expand Down Expand Up @@ -153,6 +165,10 @@ def has_recording(self) -> bool:
def has_video(self) -> bool:
return self._first_non_padding_cut.has_video

@property
def is_in_memory(self) -> bool:
return any(track.cut.is_in_memory for track in self.tracks)

def has(self, field: str) -> bool:
return self._first_non_padding_cut.has(field)

Expand Down Expand Up @@ -191,6 +207,22 @@ def num_channels(self) -> Optional[int]:
def features_type(self) -> Optional[str]:
return self._first_non_padding_cut.features.type if self.has_features else None

def iter_data(
self,
) -> Generator[
Tuple[str, Union[Recording, Features, Array, TemporalArray]], None, None
]:
"""
Iterate over each data piece attached to this cut.
Returns a generator yielding tuples of ``(key, manifest)``, where
``key`` is the name of the attribute under which ``manifest`` is found.
``manifest`` is of type :class:`~lhotse.Recording`, :class:`~lhotse.Features`,
:class:`~lhotse.TemporalArray`, or :class:`~lhotse.Array`.
For example, if ``key`` is ``recording``, then ``manifest`` is ``self.recording``.
"""
return self._first_non_padding_cut.iter_data()

def __getattr__(self, name: str) -> Any:
"""
This magic function is called when the user tries to access an attribute
Expand Down Expand Up @@ -1212,6 +1244,13 @@ def drop_alignments(self) -> "MixedCut":
tracks=[fastcopy(t, cut=t.cut.drop_alignments()) for t in self.tracks],
)

def drop_in_memory_data(self) -> "MixedCut":
"""Return a copy of the current :class:`MixedCut`, which doesn't contain any in-memory data."""
return fastcopy(
self,
tracks=[fastcopy(t, cut=t.cut.drop_in_memory_data()) for t in self.tracks],
)

def compute_and_store_features(
self,
extractor: FeatureExtractor,
Expand Down Expand Up @@ -1540,9 +1579,19 @@ def with_recording_path_prefix(self, path: Pathlike) -> "MixedCut":
)

@property
def _first_non_padding_cut(self) -> DataCut:
def first_non_padding_cut(self) -> DataCut:
return self._first_non_padding_track.cut

@property
def _first_non_padding_track(self) -> MixTrack:
def first_non_padding_track(self) -> MixTrack:
return [t for t in self.tracks if not isinstance(t.cut, PaddingCut)][0]

# Note: the private properties below are kept for backward compatibility.

@property
def _first_non_padding_cut(self) -> DataCut:
return self.first_non_padding_cut

@property
def _first_non_padding_track(self) -> MixTrack:
return self.first_non_padding_track
18 changes: 15 additions & 3 deletions lhotse/cut/padding.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union

import numpy as np
import torch
Expand Down Expand Up @@ -85,6 +85,10 @@ def has_video(self) -> bool:
def num_channels(self) -> int:
return 1

@property
def is_in_memory(self) -> bool:
return False

def has(self, field: str) -> bool:
if field == "recording":
return self.has_recording
Expand All @@ -99,6 +103,10 @@ def has(self, field: str) -> bool:
def recording_id(self) -> str:
return "PAD"

def iter_data(self) -> Iterable:
"""Empty iterable."""
return ()

# noinspection PyUnusedLocal
def load_features(self, *args, **kwargs) -> Optional[np.ndarray]:
if self.has_features:
Expand Down Expand Up @@ -421,11 +429,15 @@ def drop_recording(self) -> "PaddingCut":
return fastcopy(self, num_samples=None)

def drop_supervisions(self) -> "PaddingCut":
"""Return a copy of the current :class:`.PaddingCut`, detached from ``supervisions``."""
"""No-op"""
return self

def drop_alignments(self) -> "PaddingCut":
"""Return a copy of the current :class:`.PaddingCut`, detached from ``alignments``."""
"""No-op"""
return self

def drop_in_memory_data(self) -> "PaddingCut":
"""No-op."""
return self

def compute_and_store_features(
Expand Down
12 changes: 12 additions & 0 deletions lhotse/cut/set.py
Original file line number Diff line number Diff line change
Expand Up @@ -1743,6 +1743,14 @@ def drop_alignments(self) -> "CutSet":
"""
return self.map(_drop_alignments)

def drop_in_memory_data(self) -> "CutSet":
"""
Return a new :class:`.CutSet`, where each :class:`.Cut` is copied and detached from any in-memory data it held.
The manifests for in-memory data are converted into placeholders that can still be looked up for
metadata, but will fail on attempts to load the data.
"""
return self.map(_drop_in_memory_data)

def compute_and_store_features(
self,
extractor: FeatureExtractor,
Expand Down Expand Up @@ -3355,6 +3363,10 @@ def _drop_supervisions(cut, *args, **kwargs):
return cut.drop_supervisions(*args, **kwargs)


def _drop_in_memory_data(cut, *args, **kwargs):
return cut.drop_in_memory_data(*args, **kwargs)


def _truncate_single(
cut: Cut,
max_duration: Seconds,
Expand Down
10 changes: 9 additions & 1 deletion lhotse/features/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from lhotse.audio import Recording
from lhotse.augmentation import AugmentFn
from lhotse.features.io import FeaturesWriter, get_reader
from lhotse.features.io import FeaturesWriter, get_reader, is_in_memory
from lhotse.lazy import AlgorithmMixin
from lhotse.serialization import LazyMixin, Serializable, load_yaml, save_to_yaml
from lhotse.utils import (
Expand Down Expand Up @@ -458,6 +458,14 @@ class Features:
def end(self) -> Seconds:
return self.start + self.duration

@property
def is_in_memory(self) -> bool:
return is_in_memory(self.storage_type)

@property
def is_placeholder(self) -> bool:
return self.storage_type == "shar"

def load(
self,
start: Optional[Seconds] = None,
Expand Down
4 changes: 4 additions & 0 deletions lhotse/features/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1108,6 +1108,10 @@ def __exit__(self, exc_type, exc_val, exc_tb):
"""


def is_in_memory(storage_type: str) -> bool:
return "memory" in storage_type


def get_memory_writer(name: str):
assert "memory" in name
return get_writer(name)
Expand Down
Loading

0 comments on commit e3bed73

Please sign in to comment.