Skip to content

Commit

Permalink
Flate2 zlib ng (#1365)
Browse files Browse the repository at this point in the history
* Use different variable name than "bytes" as it is reserved keyword in python.
  • Loading branch information
ritchie46 committed Sep 16, 2021
1 parent d668ecb commit 44f3918
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 13 deletions.
1 change: 1 addition & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ concat_str = ["polars-core/concat_str", "polars-lazy/concat_str"]
row_hash = ["polars-core/row_hash"]
reinterpret = ["polars-core/reinterpret", "polars-core/dtype-u64"]
decompress = ["polars-io/decompress"]
decompress-fast = ["polars-io/decompress-fast"]
mode = ["polars-core/mode", "polars-lazy/mode"]
take_opt_iter = ["polars-core/take_opt_iter"]
extract_jsonpath = ["polars-core/extract_jsonpath", "polars-core/strings"]
Expand Down
5 changes: 3 additions & 2 deletions polars/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ dtype-date64 = ["polars-core/dtype-date64", "polars-core/temporal"]
dtype-date32 = ["polars-core/dtype-date32"]
csv-file = ["csv-core", "memmap", "lexical", "arrow/io_csv"]
fmt = ["polars-core/plain_fmt"]
decompress = ["flate2"]
decompress = ["flate2/miniz_oxide"]
decompress-fast = ["flate2/zlib-ng-compat"]
# don't use this
private = []

Expand All @@ -44,7 +45,7 @@ ahash = "0.7"
num = "^0.4.0"
dirs = "3.0"
simdutf8 = {version="0.1", optional=true}
flate2 = {version = "1", optional=true}
flate2 = {version = "1", optional=true, --default-features=false}

[package.metadata.docs.rs]
all-features = true
Expand Down
12 changes: 8 additions & 4 deletions polars/polars-io/src/csv_core/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,12 @@ impl<'a> CoreReader<'a> {

#[allow(clippy::too_many_arguments)]
pub fn build_csv_reader<'a>(
#[cfg(feature = "decompress")] mut reader_bytes: ReaderBytes<'a>,
#[cfg(not(feature = "decompress"))] reader_bytes: ReaderBytes<'a>,
#[cfg(any(feature = "decompress", feature = "decompress-fast"))] mut reader_bytes: ReaderBytes<
'a,
>,
#[cfg(not(any(feature = "decompress", feature = "decompress-fast")))] reader_bytes: ReaderBytes<
'a,
>,
n_rows: Option<usize>,
skip_rows: usize,
mut projection: Option<Vec<usize>>,
Expand Down Expand Up @@ -512,7 +516,7 @@ pub fn build_csv_reader<'a>(
let mut schema = match schema {
Some(schema) => Cow::Borrowed(schema),
None => {
#[cfg(feature = "decompress")]
#[cfg(any(feature = "decompress", feature = "decompress-fast"))]
{
// We keep track of the inferred schema bool
// In case the file is compressed this schema inference is wrong and has to be done
Expand All @@ -532,7 +536,7 @@ pub fn build_csv_reader<'a>(
)?;
Cow::Owned(inferred_schema)
}
#[cfg(not(feature = "decompress"))]
#[cfg(not(any(feature = "decompress", feature = "decompress-fast")))]
{
let (inferred_schema, _) = infer_file_schema(
&reader_bytes,
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/src/csv_core/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ pub fn infer_file_schema(
Ok((Schema::new(fields), records_count))
}

#[cfg(feature = "decompress")]
#[cfg(any(feature = "decompress", feature = "decompress-fast"))]
pub(crate) fn decompress(bytes: &[u8]) -> Option<Vec<u8>> {
// magic numbers
let gzip: [u8; 2] = [31, 139];
Expand Down
35 changes: 35 additions & 0 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ features = [
"concat_str",
"row_hash",
"reinterpret",
"decompress",
"decompress-fast",
"mode",
"extract_jsonpath",
"lazy_regex",
Expand Down
10 changes: 5 additions & 5 deletions py-polars/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ def test_compressed_csv():
with gzip.GzipFile(fileobj=fout, mode="w") as f:
f.write(csv.encode())

bytes = fout.getvalue()
out = pl.read_csv(bytes)
csv_bytes = fout.getvalue()
out = pl.read_csv(csv_bytes)
expected = pl.DataFrame(
{"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
)
Expand All @@ -162,13 +162,13 @@ def test_compressed_csv():
assert out.frame_equal(expected)

# now with column projection
out = pl.read_csv(bytes, columns=["a", "b"])
out = pl.read_csv(csv_bytes, columns=["a", "b"])
expected = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
assert out.frame_equal(expected)

# zlib compression
bytes = zlib.compress(csv.encode())
out = pl.read_csv(bytes)
csv_bytes = zlib.compress(csv.encode())
out = pl.read_csv(csv_bytes)
expected = pl.DataFrame(
{"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [1.0, 2.0, 3.0]}
)
Expand Down

0 comments on commit 44f3918

Please sign in to comment.