Skip to content

Commit

Permalink
fix bug in pyarrow list roundtrip
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 5, 2021
1 parent d29d7b6 commit 813ce3b
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
29 changes: 25 additions & 4 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,9 +506,11 @@ impl DataType {
Date32 => ArrowDataType::Date32,
Date64 => ArrowDataType::Date64,
Time64(tu) => ArrowDataType::Time64(tu.clone()),
List(dt) => {
ArrowDataType::List(Box::new(arrow::datatypes::Field::new("", dt.clone(), true)))
}
List(dt) => ArrowDataType::LargeList(Box::new(arrow::datatypes::Field::new(
"",
dt.clone(),
true,
))),
Duration(tu) => ArrowDataType::Duration(tu.clone()),
Null => ArrowDataType::Null,
#[cfg(feature = "object")]
Expand Down Expand Up @@ -635,7 +637,26 @@ impl Schema {
}

pub fn to_arrow(&self) -> ArrowSchema {
let fields = self.fields.iter().map(|f| f.to_arrow()).collect();
let fields = self
.fields
.iter()
.map(|f| {
match f.data_type() {
// we must call this item, because the arrow crate names this item when creating a
// schema from record batches
DataType::List(dt) => ArrowField::new(
f.name(),
ArrowDataType::LargeList(Box::new(ArrowField::new(
"item",
dt.clone(),
true,
))),
true,
),
_ => f.to_arrow(),
}
})
.collect();
ArrowSchema::new(fields)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime

import pandas as pd
import pyarrow as pa

import polars as pl

Expand All @@ -25,3 +26,8 @@ def test_from_pandas_datetime():

# checks lazy dispatch
pl.DataFrame([s.rename("foo")])[pl.col("foo").dt.round("hour", 2)]


def test_arrow_list_roundtrip():
# https://github.com/pola-rs/polars/issues/1064
pl.from_arrow(pa.table({"a": [1], "b": [[1, 2]]})).to_arrow()

0 comments on commit 813ce3b

Please sign in to comment.