Skip to content

Commit

Permalink
chore[rust]: update arrow (#4461)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 17, 2022
1 parent 15e8036 commit d74e010
Show file tree
Hide file tree
Showing 10 changed files with 39 additions and 38 deletions.
2 changes: 1 addition & 1 deletion examples/read_csv/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ write_output = ["polars/ipc", "polars/parquet"]
default = ["write_output"]

[dependencies]
polars = { path = "../../polars", features = ["lazy", "csv-file"] }
polars = { path = "../../polars", features = ["lazy", "csv-file", "ipc"] }
7 changes: 6 additions & 1 deletion polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,12 @@ dtype-categorical = [
"polars-lazy/dtype-categorical",
"polars-ops/dtype-categorical",
]
dtype-struct = ["polars-core/dtype-struct", "polars-lazy/dtype-struct", "polars-ops/dtype-struct"]
dtype-struct = [
"polars-core/dtype-struct",
"polars-lazy/dtype-struct",
"polars-ops/dtype-struct",
"polars-io/dtype-struct",
]

docs-selection = [
"csv-file",
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ description = "Arrow interfaces for Polars DataFrame library"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "6abd0c4164676f7b17865c8def875401b5bbd5fc", features = ["compute_concatenate"], default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "0b345ae310e99729b2b48c56a2cb0a99027d1ac2", features = ["compute_concatenate"], default-features = false }
# arrow = { package = "arrow2", path = "../../../arrow2", features = ["compute_concatenate"], default-features = false }
# arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "ipc_meta", features = ["compute_concatenate"], default-features = false }
# arrow = { package = "arrow2", version = "0.12", default-features = false, features = ["compute_concatenate"] }
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ thiserror = "^1.0"
package = "arrow2"
git = "https://github.com/jorgecarleitao/arrow2"
# git = "https://github.com/ritchie46/arrow2"
rev = "6abd0c4164676f7b17865c8def875401b5bbd5fc"
rev = "0b345ae310e99729b2b48c56a2cb0a99027d1ac2"
# path = "../../../arrow2"
# branch = "ipc_meta"
# version = "0.12"
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dtype-categorical = ["polars-core/dtype-categorical"]
dtype-date = ["polars-core/dtype-date", "polars-time/dtype-date"]
dtype-datetime = ["polars-core/dtype-datetime", "polars-core/temporal", "polars-time/dtype-datetime"]
dtype-time = ["polars-core/dtype-time", "polars-core/temporal", "polars-time/dtype-time"]
dtype-struct = ["polars-core/dtype-struct"]
fmt = ["polars-core/fmt"]
lazy = []
parquet = ["polars-core/parquet", "arrow/io_parquet", "arrow/io_parquet_compression", "memmap"]
Expand All @@ -37,7 +38,7 @@ private = ["polars-time/private"]
[dependencies]
ahash = "0.7"
anyhow = "1.0"
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "6abd0c4164676f7b17865c8def875401b5bbd5fc", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "0b345ae310e99729b2b48c56a2cb0a99027d1ac2", default-features = false }
# arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "ipc_meta", default-features = false }
# arrow = { package = "arrow2", version = "0.12", default-features = false }
# arrow = { package = "arrow2", path = "../../../arrow2", default-features = false }
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-io/src/ndjson_core/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ fn value_to_dtype(val: &Value) -> DataType {
Value::Static(StaticNode::U64(_)) => DataType::UInt64,
Value::Static(StaticNode::F64(_)) => DataType::Float64,
Value::Static(StaticNode::Null) => DataType::Null,
Value::String(_) => DataType::Utf8,
Value::Array(arr) => {
let dtype = value_to_dtype(&arr[0]);

Expand Down Expand Up @@ -270,7 +269,7 @@ fn deserialize_all<'a, 'b>(json: &'b Value) -> AnyValue<'a> {
#[cfg(feature = "dtype-struct")]
Value::Object(doc) => {
let vals: (Vec<AnyValue>, Vec<Field>) = doc
.into_iter()
.iter()
.map(|(key, value)| {
let dt = value_to_dtype(value);
let fld = Field::new(key, dt);
Expand All @@ -280,6 +279,7 @@ fn deserialize_all<'a, 'b>(json: &'b Value) -> AnyValue<'a> {
.unzip();
AnyValue::StructOwned(Box::new(vals))
}
#[cfg(not(feature = "dtype-struct"))]
val => AnyValue::Utf8Owned(format!("{:#?}", val)),
}
}
1 change: 1 addition & 0 deletions polars/polars-io/src/parquet/mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub(super) fn to_deserializer<'a>(
column_meta,
std::sync::Arc::new(|_, _| true),
vec![],
usize::MAX,
);
(
BasicDecompressor::new(pages, vec![]),
Expand Down
28 changes: 17 additions & 11 deletions polars/polars-io/src/parquet/predicates.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use arrow::array::Array;
use arrow::array::{Array, UInt64Array};
use arrow::compute::concatenate::concatenate;
use arrow::io::parquet::read::statistics::{self, deserialize, Statistics};
use arrow::io::parquet::read::statistics::{deserialize, Statistics};
use arrow::io::parquet::read::RowGroupMetaData;
use polars_core::prelude::*;

Expand All @@ -21,15 +21,21 @@ impl ColumnStats {
}

pub fn null_count(&self) -> Option<usize> {
match &self.0.null_count {
statistics::Count::Single(arr) => {
if arr.is_valid(0) {
Some(arr.value(0) as usize)
} else {
None
}
}
_ => None,
match self.1.data_type() {
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => None,
_ => self
.0
.null_count
.as_any()
.downcast_ref::<UInt64Array>()
.and_then(|arr| {
if arr.is_valid(0) {
Some(arr.value(0) as usize)
} else {
None
}
}),
}
}

Expand Down
25 changes: 7 additions & 18 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,9 +583,8 @@ def test_file_buffer() -> None:
f.write(b"1,2,3,4,5,6\n7,8,9,10,11,12")
f.seek(0)
# check if not fails on TryClone and Length impl in file.rs
with pytest.raises(pl.ArrowError) as e:
with pytest.raises(pl.ArrowError):
pl.read_parquet(f)
assert "Invalid Parquet file" in str(e.value)


def test_read_missing_file() -> None:
Expand Down

0 comments on commit d74e010

Please sign in to comment.