From 8d321fe8ad4c288b72edc7e8d082fcd6ec304a32 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:52:57 -0400 Subject: [PATCH] also use polars count with util::count_rows() helper if polars features is enabled --- src/cmd/count.rs | 2 +- src/util.rs | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/cmd/count.rs b/src/cmd/count.rs index ade9d3c3a..6badceba5 100644 --- a/src/cmd/count.rs +++ b/src/cmd/count.rs @@ -155,7 +155,7 @@ fn count_input( } #[cfg(feature = "polars")] -fn polars_count_input( +pub fn polars_count_input( conf: &Config, low_memory: bool, ) -> Result<(u64, usize), crate::clitypes::CliError> { diff --git a/src/util.rs b/src/util.rs index 302cd3dc3..f63fc2ade 100644 --- a/src/util.rs +++ b/src/util.rs @@ -21,6 +21,8 @@ use serde::de::DeserializeOwned; use serde::de::{Deserialize, Deserializer, Error}; use sysinfo::System; +#[cfg(feature = "polars")] +use crate::cmd::count::polars_count_input; use crate::{ config, config::{Config, Delimiter, DEFAULT_WTR_BUFFER_CAPACITY}, @@ -293,10 +295,32 @@ pub fn count_rows(conf: &Config) -> Result { Ok(idx.count()) } else { // index does not exist or is stale, - // count records by iterating through records + // count records by using polars mem-mapped reader if available + // otherwise, count records by iterating through records // Do this only once per invocation and cache the result in ROW_COUNT, // so we don't have to re-count rows every time we need to know the // rowcount for CSVs that don't have an index. + #[cfg(feature = "polars")] + let count_opt = ROW_COUNT.get_or_init(|| { + if let Ok((count, _)) = polars_count_input(conf, false) { + Some(count) + } else { + // if polars_count_input fails, fall back to regular CSV reader + if let Ok(mut rdr) = conf.reader() { + let mut count = 0_u64; + let mut _record = csv::ByteRecord::new(); + #[allow(clippy::used_underscore_binding)] + while rdr.read_byte_record(&mut _record).unwrap_or_default() { + count += 1; + } + Some(count) + } else { + None + } + } + }); + + #[cfg(not(feature = "polars"))] let count_opt = ROW_COUNT.get_or_init(|| { if let Ok(mut rdr) = conf.reader() { let mut count = 0_u64;