diff --git a/python/python/lance/file.py b/python/python/lance/file.py index e81b61d7b5a..30491a59923 100644 --- a/python/python/lance/file.py +++ b/python/python/lance/file.py @@ -169,6 +169,10 @@ def read_global_buffer(self, index: int) -> bytes: """ return self._reader.read_global_buffer(index) + def num_rows(self) -> int: + """Return the number of rows belonging to the data file.""" + return self._reader.num_rows() + class LanceFileWriter: """ diff --git a/python/python/lance/lance/__init__.pyi b/python/python/lance/lance/__init__.pyi index 7e5e14cc875..eaa080a3dd2 100644 --- a/python/python/lance/lance/__init__.pyi +++ b/python/python/lance/lance/__init__.pyi @@ -116,6 +116,7 @@ class LanceFileReader: def read_global_buffer(self, index: int) -> bytes: ... def metadata(self) -> LanceFileMetadata: ... def file_statistics(self) -> LanceFileStatistics: ... + def num_rows(self): ... class LanceBufferDescriptor: position: int diff --git a/python/python/tests/test_file.py b/python/python/tests/test_file.py index 53905a9dcd7..02788c27536 100644 --- a/python/python/tests/test_file.py +++ b/python/python/tests/test_file.py @@ -100,6 +100,17 @@ def test_take(tmp_path): assert table == pa.table({"a": [0, 77, 83]}) +def test_num_rows(tmp_path): + path = tmp_path / "foo.lance" + schema = pa.schema([pa.field("a", pa.int64())]) + writer = LanceFileWriter(str(path), schema) + writer.write_batch(pa.table({"a": [i for i in range(100)]})) + writer.close() + + reader = LanceFileReader(str(path)) + assert reader.num_rows() == 100 + + def check_round_trip(tmp_path, table): path = tmp_path / "foo.lance" with LanceFileWriter(str(path), table.schema) as writer: diff --git a/python/src/file.rs b/python/src/file.rs index f9e494c1064..832a868ec5d 100644 --- a/python/src/file.rs +++ b/python/src/file.rs @@ -527,6 +527,10 @@ impl LanceFileReader { .infer_error()?; Ok(buffer_bytes.to_vec()) } + + pub fn num_rows(&mut self) -> u64 { + self.inner.num_rows() + } } #[cfg(test)]