Skip to content

Commit

Permalink
fix(rust, python): Correct CSV row indexing (#5385)
Browse files Browse the repository at this point in the history
  • Loading branch information
owrior committed Oct 31, 2022
1 parent 55a58a3 commit 4dd178b
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
2 changes: 1 addition & 1 deletion polars/polars-io/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ pub(crate) fn update_row_counts(dfs: &mut [(DataFrame, IdxSize)], offset: IdxSiz
if let Some(s) = df.get_columns_mut().get_mut(0) {
*s = &*s + previous;
}
previous = *n_read;
previous += *n_read;
}
}
}
Expand Down
9 changes: 9 additions & 0 deletions py-polars/tests/slow/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,12 @@ def test_csv_scan_categorical() -> None:
"/tmp/test_csv_scan_categorical.csv", dtypes={"x": pl.Categorical}
).collect()
assert df["x"].dtype == pl.Categorical


def test_read_csv_chunked() -> None:
"""Check that row count is properly functioning."""
csv = "\n".join(["1" for _ in range(10_000)])
df = pl.read_csv(io.StringIO(csv), row_count_name="count")

# The next value should always be higher if monotonically increasing.
assert df.filter(pl.col("count") < pl.col("count").shift(1)).is_empty()

0 comments on commit 4dd178b

Please sign in to comment.