Skip to content

Commit

Permalink
csv parsing: skip whitespace on failed parse (#2953)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 23, 2022
1 parent ad2f32a commit af61de0
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
19 changes: 15 additions & 4 deletions polars/polars-io/src/csv_core/buffer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::csv::CsvEncoding;
use crate::csv_core::csv::RunningSize;
use crate::csv_core::parser::{is_whitespace, skip_whitespace};
use crate::csv_core::utils::escape_field;
use arrow::array::Utf8Array;
use arrow::bitmap::MutableBitmap;
Expand Down Expand Up @@ -85,10 +86,20 @@ where
// its faster to work on options.
// if we need to throw an error, we parse again to be able to throw the error

match (T::parse(bytes), ignore_errors) {
(Some(value), _) => self.append_value(value),
(None, true) => self.append_null(),
(None, _) => return Err(PolarsError::ComputeError("".into())),
match T::parse(bytes) {
Some(value) => self.append_value(value),
None => {
// try again without whitespace
if is_whitespace(bytes[0]) {
let bytes = skip_whitespace(bytes);
return self.parse_bytes(bytes, ignore_errors, needs_escaping);
}
if ignore_errors {
self.append_null()
} else {
return Err(PolarsError::ComputeError("".into()));
}
}
};
}
Ok(())
Expand Down
15 changes: 15 additions & 0 deletions polars/tests/it/io/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -983,3 +983,18 @@ fn test_parse_dates() -> Result<()> {
assert_eq!(out.column("date")?.null_count(), 1);
Ok(())
}

#[test]
fn test_whitespace_skipping() -> Result<()> {
let csv = "a,b
12, 1435";
let file = Cursor::new(csv);
let out = CsvReader::new(file).finish()?;
let expected = df![
"a" => [12i64],
"b" => [1435i64],
]?;
assert!(out.frame_equal(&expected));

Ok(())
}

0 comments on commit af61de0

Please sign in to comment.