Skip to content

Commit

Permalink
fix(rust, python): Update read_csv error message (#6082)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jan 6, 2023
1 parent c129710 commit 478d3eb
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 46 deletions.
35 changes: 17 additions & 18 deletions polars/polars-io/src/csv/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ pub(super) fn parse_lines<'a>(
null_values: Option<&NullValuesCompiled>,
projection: &[usize],
buffers: &mut [Buffer<'a>],
ignore_parser_errors: bool,
ignore_errors: bool,
n_lines: usize,
// length or original schema
schema_len: usize,
Expand Down Expand Up @@ -547,28 +547,27 @@ pub(super) fn parse_lines<'a>(
if add_null {
buf.add_null()
} else {
buf.add(field, ignore_parser_errors, needs_escaping)
.map_err(|_| {
let bytes_offset = offset + field.as_ptr() as usize - start;
let unparsable = String::from_utf8_lossy(field);
PolarsError::ComputeError(
format!(
"Could not parse `{}` as dtype {:?} at column {}.\n\
buf.add(field, ignore_errors, needs_escaping).map_err(|_| {
let bytes_offset = offset + field.as_ptr() as usize - start;
let unparsable = String::from_utf8_lossy(field);
PolarsError::ComputeError(
format!(
"Could not parse `{}` as dtype {:?} at column {}.\n\
The current offset in the file is {} bytes.\n\
\n\
Consider specifying the correct dtype, increasing\n\
the number of records used to infer the schema,\n\
running the parser with `ignore_parser_errors=true`\n\
or adding `{}` to the `null_values` list.",
&unparsable,
buf.dtype(),
idx,
bytes_offset,
&unparsable,
)
.into(),
enabling the `ignore_errors` flag, or adding\n\
`{}` to the `null_values` list.",
&unparsable,
buf.dtype(),
idx,
bytes_offset,
&unparsable,
)
})?;
.into(),
)
})?;
}

processed_fields += 1;
Expand Down
10 changes: 5 additions & 5 deletions polars/polars-io/src/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ where
columns: Option<Vec<String>>,
delimiter: Option<u8>,
has_header: bool,
ignore_parser_errors: bool,
ignore_errors: bool,
pub(crate) schema: Option<&'a Schema>,
encoding: CsvEncoding,
n_threads: Option<usize>,
Expand Down Expand Up @@ -168,8 +168,8 @@ where
}

/// Continue with next batch when a ParserError is encountered.
pub fn with_ignore_parser_errors(mut self, ignore: bool) -> Self {
self.ignore_parser_errors = ignore;
pub fn with_ignore_errors(mut self, ignore: bool) -> Self {
self.ignore_errors = ignore;
self
}

Expand Down Expand Up @@ -339,7 +339,7 @@ impl<'a, R: MmapBytesReader + 'a> CsvReader<'a, R> {
self.max_records,
self.delimiter,
self.has_header,
self.ignore_parser_errors,
self.ignore_errors,
self.schema,
std::mem::take(&mut self.columns),
self.encoding,
Expand Down Expand Up @@ -462,7 +462,7 @@ where
projection: None,
delimiter: None,
has_header: true,
ignore_parser_errors: false,
ignore_errors: false,
schema: None,
columns: None,
encoding: CsvEncoding::Utf8,
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-io/src/csv/read_impl/batched.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl<'a> CoreReader<'a> {
eol_char: self.eol_char,
null_values: self.null_values,
to_cast: self.to_cast,
ignore_parser_errors: self.ignore_parser_errors,
ignore_errors: self.ignore_errors,
n_rows: self.n_rows,
encoding: self.encoding,
delimiter: self.delimiter,
Expand All @@ -70,7 +70,7 @@ pub struct BatchedCsvReader<'a> {
eol_char: u8,
null_values: Option<NullValuesCompiled>,
to_cast: Vec<Field>,
ignore_parser_errors: bool,
ignore_errors: bool,
n_rows: Option<usize>,
encoding: CsvEncoding,
delimiter: u8,
Expand Down Expand Up @@ -110,7 +110,7 @@ impl<'a> BatchedCsvReader<'a> {
bytes,
self.delimiter,
self.schema.as_ref(),
self.ignore_parser_errors,
self.ignore_errors,
&self.projection,
bytes_offset_thread,
self.quote_char,
Expand Down
22 changes: 11 additions & 11 deletions polars/polars-io/src/csv/read_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ pub(crate) struct CoreReader<'a> {
projection: Option<Vec<usize>>,
/// Current line number, used in error reporting
line_number: usize,
ignore_parser_errors: bool,
ignore_errors: bool,
skip_rows_before_header: usize,
// after the header, we need to take embedded lines into account
skip_rows_after_header: usize,
Expand Down Expand Up @@ -170,7 +170,7 @@ impl<'a> CoreReader<'a> {
max_records: Option<usize>,
delimiter: Option<u8>,
has_header: bool,
ignore_parser_errors: bool,
ignore_errors: bool,
schema: Option<&'a Schema>,
columns: Option<Vec<String>>,
encoding: CsvEncoding,
Expand Down Expand Up @@ -264,7 +264,7 @@ impl<'a> CoreReader<'a> {
schema,
projection,
line_number: usize::from(has_header),
ignore_parser_errors,
ignore_errors,
skip_rows_before_header: skip_rows,
skip_rows_after_header,
n_rows,
Expand Down Expand Up @@ -508,7 +508,7 @@ impl<'a> CoreReader<'a> {
&self.init_string_size_stats(&str_columns, 0),
self.quote_char,
self.encoding,
self.ignore_parser_errors,
self.ignore_errors,
)?;
let df = DataFrame::new_no_checks(
buffers
Expand All @@ -530,7 +530,7 @@ impl<'a> CoreReader<'a> {
.map(|(bytes_offset_thread, stop_at_nbytes)| {
let delimiter = self.delimiter;
let schema = self.schema.as_ref();
let ignore_parser_errors = self.ignore_parser_errors;
let ignore_errors = self.ignore_errors;
let projection = &projection;

let mut read = bytes_offset_thread;
Expand All @@ -549,7 +549,7 @@ impl<'a> CoreReader<'a> {
&str_capacities,
self.quote_char,
self.encoding,
self.ignore_parser_errors,
self.ignore_errors,
)?;

let local_bytes = &bytes[read..stop_at_nbytes];
Expand All @@ -566,7 +566,7 @@ impl<'a> CoreReader<'a> {
self.null_values.as_ref(),
projection,
&mut buffers,
ignore_parser_errors,
ignore_errors,
chunk_size,
self.schema.len(),
)?;
Expand Down Expand Up @@ -624,7 +624,7 @@ impl<'a> CoreReader<'a> {
bytes,
self.delimiter,
self.schema.as_ref(),
self.ignore_parser_errors,
self.ignore_errors,
&projection,
bytes_offset_thread,
self.quote_char,
Expand Down Expand Up @@ -701,7 +701,7 @@ fn read_chunk(
bytes: &[u8],
delimiter: u8,
schema: &Schema,
ignore_parser_errors: bool,
ignore_errors: bool,
projection: &[usize],
bytes_offset_thread: usize,
quote_char: Option<u8>,
Expand All @@ -723,7 +723,7 @@ fn read_chunk(
str_capacities,
quote_char,
encoding,
ignore_parser_errors,
ignore_errors,
)?;

let mut last_read = usize::MAX;
Expand All @@ -745,7 +745,7 @@ fn read_chunk(
null_values,
projection,
&mut buffers,
ignore_parser_errors,
ignore_errors,
chunk_size,
schema.len(),
)?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl CsvSource {
.has_header(options.has_header)
.with_schema(schema_ref)
.with_delimiter(options.delimiter)
.with_ignore_parser_errors(options.ignore_errors)
.with_ignore_errors(options.ignore_errors)
.with_skip_rows(options.skip_rows)
.with_n_rows(n_rows)
.with_columns(with_columns.map(|mut cols| std::mem::take(Arc::make_mut(&mut cols))))
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/frame/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl<'a> LazyCsvReader<'a> {

/// Continue with next batch when a ParserError is encountered.
#[must_use]
pub fn with_ignore_parser_errors(mut self, ignore: bool) -> Self {
pub fn with_ignore_errors(mut self, ignore: bool) -> Self {
self.ignore_errors = ignore;
self
}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/physical_plan/executors/scan/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl CsvExec {
.has_header(self.options.has_header)
.with_schema(&self.schema)
.with_delimiter(self.options.delimiter)
.with_ignore_parser_errors(self.options.ignore_errors)
.with_ignore_errors(self.options.ignore_errors)
.with_skip_rows(self.options.skip_rows)
.with_n_rows(n_rows)
.with_columns(with_columns.map(|mut cols| std::mem::take(Arc::make_mut(&mut cols))))
Expand Down
6 changes: 3 additions & 3 deletions polars/tests/it/io/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ fn test_parser() -> PolarsResult<()> {
CsvReader::new(file)
.infer_schema(Some(100))
.has_header(true)
.with_ignore_parser_errors(true)
.with_ignore_errors(true)
.finish()
.unwrap();

Expand All @@ -72,7 +72,7 @@ fn test_parser() -> PolarsResult<()> {
// we also check if infer schema ignores errors
.infer_schema(Some(10))
.has_header(true)
.with_ignore_parser_errors(true)
.with_ignore_errors(true)
.finish()
.unwrap();

Expand Down Expand Up @@ -146,7 +146,7 @@ fn test_tab_sep() {
.infer_schema(Some(100))
.with_delimiter(b'\t')
.has_header(false)
.with_ignore_parser_errors(true)
.with_ignore_errors(true)
.finish()
.unwrap();
assert_eq!(df.shape(), (8, 26))
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/batched_csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ impl PyBatchedCsv {
.with_n_rows(n_rows)
.with_delimiter(sep.as_bytes()[0])
.with_skip_rows(skip_rows)
.with_ignore_parser_errors(ignore_errors)
.with_ignore_errors(ignore_errors)
.with_projection(projection)
.with_rechunk(rechunk)
.with_chunk_size(chunk_size)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ impl PyDataFrame {
.with_n_rows(n_rows)
.with_delimiter(sep.as_bytes()[0])
.with_skip_rows(skip_rows)
.with_ignore_parser_errors(ignore_errors)
.with_ignore_errors(ignore_errors)
.with_projection(projection)
.with_rechunk(rechunk)
.with_chunk_size(chunk_size)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl PyLazyFrame {
.with_infer_schema_length(infer_schema_length)
.with_delimiter(delimiter)
.has_header(has_header)
.with_ignore_parser_errors(ignore_errors)
.with_ignore_errors(ignore_errors)
.with_skip_rows(skip_rows)
.with_n_rows(n_rows)
.with_cache(cache)
Expand Down

0 comments on commit 478d3eb

Please sign in to comment.