Skip to content

Commit

Permalink
update arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 8, 2021
1 parent 8b9c556 commit ad92f1e
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 23 deletions.
2 changes: 1 addition & 1 deletion polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ description = "Arrow interfaces for Polars DataFrame library"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "f146097350512aa28a2bf54181c49361b97a8053", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "6e9ea352b470556e08600274ba2add6106cf6e26", default-features = false }
#arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch="dev", default-features = false }
#arrow = { package = "arrow2", version = "0.7", default-features=false}
thiserror = "^1.0"
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ docs-selection = [
]

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "f146097350512aa28a2bf54181c49361b97a8053", default-features = false, features=["compute"] }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "6e9ea352b470556e08600274ba2add6106cf6e26", default-features = false, features=["compute"] }
#arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", default-features = false, features=["compute"], branch="dev" }
#arrow = { package = "arrow2", version="0.7", default-features = false, features=["compute"]}
polars-arrow = {version = "0.17.0", path = "../polars-arrow"}
Expand Down
17 changes: 6 additions & 11 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::chunked_array::object::PolarsObjectSafe;
use crate::prelude::*;
use ahash::RandomState;
use arrow::compute::comparison::Simd8;
use arrow::datatypes::IntegerType;
pub use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
use arrow::types::simd::Simd;
use arrow::types::NativeType;
Expand Down Expand Up @@ -663,7 +664,7 @@ impl Schema {
DataType::Categorical => ArrowField::new(
f.name(),
ArrowDataType::Dictionary(
Box::new(ArrowDataType::UInt32),
IntegerType::UInt32,
Box::new(ArrowDataType::LargeUtf8),
),
true,
Expand Down Expand Up @@ -818,29 +819,23 @@ mod test {
DataType::List(DataType::Float64.into()),
),
(
ArrowDataType::Dictionary(ArrowDataType::UInt32.into(), ArrowDataType::Utf8.into()),
ArrowDataType::Dictionary(IntegerType::UInt32, ArrowDataType::Utf8.into()),
DataType::Categorical,
),
(
ArrowDataType::Dictionary(
ArrowDataType::UInt32.into(),
ArrowDataType::LargeUtf8.into(),
),
ArrowDataType::Dictionary(IntegerType::UInt32, ArrowDataType::LargeUtf8.into()),
DataType::Categorical,
),
(
ArrowDataType::Dictionary(
ArrowDataType::UInt64.into(),
ArrowDataType::LargeUtf8.into(),
),
ArrowDataType::Dictionary(IntegerType::UInt64, ArrowDataType::LargeUtf8.into()),
DataType::Categorical,
),
];

for (dt_a, dt_p) in dtypes {
let dt: DataType = (&dt_a).into();

assert_eq!(dt_p, dt_a);
assert_eq!(dt_p, dt);
}
}
}
21 changes: 13 additions & 8 deletions polars/polars-core/src/series/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,14 +225,19 @@ impl std::convert::TryFrom<(&str, Vec<ArrayRef>)> for Series {
#[cfg(not(feature = "dtype-i8"))]
Ok(UInt32Chunked::full_null(name, len).into_series())
}
#[cfg(not(feature = "dtype-categorical"))]
ArrowDataType::Dictionary(_, _) => {
panic!("activate dtype-categorical to convert dictionary arrays")
}
#[cfg(feature = "dtype-categorical")]
ArrowDataType::Dictionary(key_type, value_type) => {
use crate::chunked_array::categorical::CategoricalChunkedBuilder;
use arrow::datatypes::IntegerType;
let chunks = chunks.iter().map(|arr| &**arr).collect::<Vec<_>>();
let arr = arrow::compute::concat::concatenate(&chunks)?;

let (keys, values) = match (&**key_type, &**value_type) {
(ArrowDataType::Int8, ArrowDataType::LargeUtf8) => {
let (keys, values) = match (key_type, &**value_type) {
(IntegerType::Int8, ArrowDataType::LargeUtf8) => {
let arr = arr.as_any().downcast_ref::<DictionaryArray<i8>>().unwrap();
let keys = arr.keys();
let keys = cast(keys, &ArrowDataType::UInt32)
Expand All @@ -245,7 +250,7 @@ impl std::convert::TryFrom<(&str, Vec<ArrayRef>)> for Series {
let values = values.as_any().downcast_ref::<LargeStringArray>().unwrap();
(keys, values.clone())
}
(ArrowDataType::Int16, ArrowDataType::LargeUtf8) => {
(IntegerType::Int16, ArrowDataType::LargeUtf8) => {
let arr = arr.as_any().downcast_ref::<DictionaryArray<i16>>().unwrap();
let keys = arr.keys();
let keys = cast(keys, &ArrowDataType::UInt32)
Expand All @@ -258,7 +263,7 @@ impl std::convert::TryFrom<(&str, Vec<ArrayRef>)> for Series {
let values = values.as_any().downcast_ref::<LargeStringArray>().unwrap();
(keys, values.clone())
}
(ArrowDataType::Int32, ArrowDataType::LargeUtf8) => {
(IntegerType::Int32, ArrowDataType::LargeUtf8) => {
let arr = arr.as_any().downcast_ref::<DictionaryArray<i32>>().unwrap();
let keys = arr.keys();
let keys = cast(keys, &ArrowDataType::UInt32)
Expand All @@ -271,14 +276,14 @@ impl std::convert::TryFrom<(&str, Vec<ArrayRef>)> for Series {
let values = values.as_any().downcast_ref::<LargeStringArray>().unwrap();
(keys, values.clone())
}
(ArrowDataType::UInt32, ArrowDataType::LargeUtf8) => {
(IntegerType::UInt32, ArrowDataType::LargeUtf8) => {
let arr = arr.as_any().downcast_ref::<DictionaryArray<u32>>().unwrap();
let keys = arr.keys();
let values = arr.values();
let values = values.as_any().downcast_ref::<LargeStringArray>().unwrap();
(keys.clone(), values.clone())
}
(ArrowDataType::Int8, ArrowDataType::Utf8) => {
(IntegerType::Int8, ArrowDataType::Utf8) => {
let arr = arr.as_any().downcast_ref::<DictionaryArray<i8>>().unwrap();
let keys = arr.keys();
let keys = cast(keys, &ArrowDataType::UInt32)
Expand All @@ -297,7 +302,7 @@ impl std::convert::TryFrom<(&str, Vec<ArrayRef>)> for Series {
.clone();
(keys, values)
}
(ArrowDataType::Int16, ArrowDataType::Utf8) => {
(IntegerType::Int16, ArrowDataType::Utf8) => {
let arr = arr.as_any().downcast_ref::<DictionaryArray<i16>>().unwrap();
let keys = arr.keys();
let keys = cast(keys, &ArrowDataType::UInt32)
Expand All @@ -316,7 +321,7 @@ impl std::convert::TryFrom<(&str, Vec<ArrayRef>)> for Series {
.clone();
(keys, values)
}
(ArrowDataType::Int32, ArrowDataType::Utf8) => {
(IntegerType::Int32, ArrowDataType::Utf8) => {
let arr = arr.as_any().downcast_ref::<DictionaryArray<i32>>().unwrap();
let keys = arr.keys();
let keys = cast(keys, &ArrowDataType::UInt32)
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ temporal = ["polars-core/dtype-date", "polars-core/dtype-datetime"]
private = []

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "f146097350512aa28a2bf54181c49361b97a8053", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "6e9ea352b470556e08600274ba2add6106cf6e26", default-features = false }
#arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", default-features = false, branch="dev"}
#arrow = { package = "arrow2", version="0.7", --default-features=false }
polars-core = {version = "0.17.0", path = "../polars-core", features = ["private"], default-features=false}
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ad92f1e

Please sign in to comment.