Skip to content

Commit

Permalink
csv: improve float inference regex
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 8, 2022
1 parent 7788875 commit 07b350e
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
9 changes: 9 additions & 0 deletions polars/polars-io/src/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1396,4 +1396,13 @@ A3,\"B4_\"\"with_embedded_double_quotes\"\"\",C4,4";
assert_eq!(df.dtypes(), expected);
Ok(())
}

#[test]
fn test_comma_separated_field_in_tsv() -> Result<()> {
let csv = "first\tsecond\n1\t2.3,2.4\n3\t4.5,4.6\n";
let file = Cursor::new(csv);
let df = CsvReader::new(file).with_delimiter(b'\t').finish()?;
assert_eq!(df.dtypes(), &[DataType::Int64, DataType::Utf8]);
Ok(())
}
}
3 changes: 2 additions & 1 deletion polars/polars-io/src/csv_core/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ pub fn get_reader_bytes<R: Read + MmapBytesReader>(reader: &mut R) -> Result<Rea

lazy_static! {
static ref FLOAT_RE: Regex =
Regex::new(r"^\s*-?((\d*\.\d+)[eE]?[-\+]?\d*)|inf|NaN|\d+[eE][-+]\d+$").unwrap();
Regex::new(r"^(\s*-?((\d*\.\d+)[eE]?[-\+]?\d*)|[-+]?inf|[-+]?NaN|\d+[eE][-+]\d+)$")
.unwrap();
static ref INTEGER_RE: Regex = Regex::new(r"^\s*-?(\d+)$").unwrap();
static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^\s*(true)$|^(false)$")
.case_insensitive(true)
Expand Down

0 comments on commit 07b350e

Please sign in to comment.