Skip to content

Commit

Permalink
feat(rust, python): Support parsing more float string representations. (
Browse files Browse the repository at this point in the history
  • Loading branch information
ghuls committed Dec 16, 2022
1 parent 9d31b49 commit 3187f57
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 2 deletions.
3 changes: 1 addition & 2 deletions polars/polars-io/src/csv/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ pub fn get_reader_bytes<R: Read + MmapBytesReader + ?Sized>(
}

static FLOAT_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^(\s*-?((\d*\.\d+)[eE]?[-\+]?\d*)|[-+]?inf|[-+]?NaN|[-+]?\d+[eE][-+]\d+)$")
.unwrap()
Regex::new(r"^\s*[-+]?((\d*\.\d+)([eE][-+]?\d+)?|inf|NaN|(\d+)[eE][-+]?\d+|\d+\.)$").unwrap()
});

static INTEGER_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\s*-?(\d+)$").unwrap());
Expand Down
31 changes: 31 additions & 0 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,37 @@ def test_csv_null_values() -> None:
assert df.rows() == [(None, "b", "c"), ("a", None, "c")]


def test_csv_float_parsing() -> None:
lines_with_floats = [
"123.86,+123.86,-123.86\n",
".987,+.987,-.987\n",
"5.,+5.,-5.\n",
"inf,+inf,-inf\n",
"NaN,+NaN,-NaN\n",
]

for line_with_floats in lines_with_floats:
f = io.StringIO(line_with_floats)
df = pl.read_csv(f, has_header=False, new_columns=["a", "b", "c"])
assert df.dtypes == [pl.Float64, pl.Float64, pl.Float64]

lines_with_scientific_numbers = [
"1e27,1E65,1e-28,1E-9\n",
"+1e27,+1E65,+1e-28,+1E-9\n",
"1e+27,1E+65,1e-28,1E-9\n",
"+1e+27,+1E+65,+1e-28,+1E-9\n",
"-1e+27,-1E+65,-1e-28,-1E-9\n",
# "e27,E65,e-28,E-9\n",
# "+e27,+E65,+e-28,+E-9\n",
# "-e27,-E65,-e-28,-E-9\n",
]

for line_with_scientific_numbers in lines_with_scientific_numbers:
f = io.StringIO(line_with_scientific_numbers)
df = pl.read_csv(f, has_header=False, new_columns=["a", "b", "c", "d"])
assert df.dtypes == [pl.Float64, pl.Float64, pl.Float64, pl.Float64]


def test_datetime_parsing() -> None:
csv = textwrap.dedent(
"""\
Expand Down

0 comments on commit 3187f57

Please sign in to comment.