Skip to content

Commit

Permalink
write categorical to parquet without casting to utf8
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 10, 2021
1 parent d731987 commit d6b26c9
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 19 deletions.
2 changes: 1 addition & 1 deletion polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ description = "Arrow interfaces for Polars DataFrame library"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "25ddb0d5fc2dfea80f3ac52eba5c26c294743da8", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "eead22fcff1f7ecad9bf48e83616a6a08f487bd4", default-features = false }
#arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch="dev", default-features = false }
#arrow = { package = "arrow2", version = "0.5.3", default-features=false}
thiserror = "^1.0"
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ docs-selection = [
]

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "25ddb0d5fc2dfea80f3ac52eba5c26c294743da8", default-features = false, features=["compute"] }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "eead22fcff1f7ecad9bf48e83616a6a08f487bd4", default-features = false, features=["compute"] }
#arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", default-features = false, features=["compute"], branch="dev" }
#arrow = { package = "arrow2", version="0.5.3", default-features = false, features=["compute"]}
polars-arrow = {version = "0.16.0", path = "../polars-arrow"}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ temporal = ["polars-core/dtype-date", "polars-core/dtype-datetime"]
private = []

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "25ddb0d5fc2dfea80f3ac52eba5c26c294743da8", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "eead22fcff1f7ecad9bf48e83616a6a08f487bd4", default-features = false }
#arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", default-features = false, branch="dev"}
#arrow = { package = "arrow2", version="0.5.3", --default-features=false }
polars-core = {version = "0.16.0", path = "../polars-core", features = ["private"], default-features=false}
Expand Down
15 changes: 0 additions & 15 deletions polars/polars-io/src/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,6 @@ where

/// Write the given DataFrame in the the writer `W`.
pub fn finish(mut self, df: &DataFrame) -> Result<()> {
// temp coerce cat to utf8 until supported in https://github.com/jorgecarleitao/parquet2/issues/57 is fixed

let columns = df
.get_columns()
.iter()
.map(|s| {
if let DataType::Categorical = s.dtype() {
s.cast(&DataType::Utf8).unwrap()
} else {
s.clone()
}
})
.collect();
let df = DataFrame::new_no_checks(columns);

let mut fields = df.schema().to_arrow().fields().clone();

// date64 is not supported by parquet and will be be truncated to date32
Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit d6b26c9

Please sign in to comment.