diff --git a/lhotse/lazy.py b/lhotse/lazy.py index 3e0b988ee..b7526700d 100644 --- a/lhotse/lazy.py +++ b/lhotse/lazy.py @@ -182,10 +182,14 @@ def __init__(self, path: Pathlike) -> None: self._len = None def __iter__(self): + tot = 0 with open_best(self.path, "r") as f: for line in f: data = decode_json_line(line) yield data + tot += 1 + if self._len is None: + self._len = tot def __len__(self) -> int: if self._len is None: diff --git a/test/test_lazy.py b/test/test_lazy.py index 8cc43b07b..e6e18c262 100644 --- a/test/test_lazy.py +++ b/test/test_lazy.py @@ -9,6 +9,7 @@ import pytest from lhotse import CutSet, FeatureSet, RecordingSet, SupervisionSet, combine +from lhotse.lazy import LazyJsonlIterator from lhotse.testing.dummies import DummyManifest, as_lazy from lhotse.utils import fastcopy, is_module_available @@ -248,3 +249,18 @@ def test_dillable(): "dummy-mono-cut-0000-random-suffix", "dummy-mono-cut-0001-random-suffix", ] + + +def test_lazy_jsonl_iterator_caches_len(): + cuts = DummyManifest(CutSet, begin_id=0, end_id=200) + expected_len = 200 + with as_lazy(cuts) as cuts_lazy: + path = cuts_lazy.data.path + print(path) + it = LazyJsonlIterator(path) + assert it._len is None + for _ in it: + pass + assert it._len is not None + assert it._len == expected_len + assert len(it) == expected_len