Skip to content

Commit

Permalink
Compiles!
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored and ritchie46 committed Jun 24, 2021
1 parent 4e26e8a commit 92d5e33
Show file tree
Hide file tree
Showing 28 changed files with 121 additions and 144 deletions.
2 changes: 1 addition & 1 deletion polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ description = "Arrow interfaces for Polars DataFrame library"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "7461b42b3e9e19ef2ff8b52d85e4915ebc44c1fd", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "d5bf08d52294688869e1e594c5cacd99663fd94b", default-features = false }
thiserror = "^1.0"
num = "^0.4"
2 changes: 1 addition & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dtype-u64 = []
parquet = ["arrow/io_parquet"]

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "7461b42b3e9e19ef2ff8b52d85e4915ebc44c1fd", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "d5bf08d52294688869e1e594c5cacd99663fd94b", default-features = false }
#arrow = {version = "4.2", default-features = false }
#parquet = {version = "4.2", default-features = false, optional = true }
polars-arrow = {version = "0.14.2", path = "../polars-arrow"}
Expand Down
9 changes: 4 additions & 5 deletions polars/polars-core/src/chunked_array/builder/categorical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ impl RevMappingBuilder {
fn finish(self) -> RevMapping {
use RevMappingBuilder::*;
match self {
Local(mut b) => RevMapping::Local(b.into()),
Global(mut map, mut b, uuid) => {
Local(b) => RevMapping::Local(b.into()),
Global(mut map, b, uuid) => {
map.shrink_to_fit();
RevMapping::Global(map, b.into(), uuid)
}
Expand Down Expand Up @@ -147,11 +147,10 @@ impl CategoricalChunkedBuilder {
}
}

pub fn finish(mut self) -> ChunkedArray<CategoricalType> {
let arr = self.array_builder.into_arc();
pub fn finish(self) -> ChunkedArray<CategoricalType> {
ChunkedArray {
field: Arc::new(self.field),
chunks: vec![arr],
chunks: vec![self.array_builder.into_arc()],
phantom: PhantomData,
categorical_map: Some(Arc::new(self.reverse_mapping.finish())),
}
Expand Down
115 changes: 52 additions & 63 deletions polars/polars-core/src/chunked_array/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl ChunkedBuilder<bool, BooleanType> for BooleanChunkedBuilder {
self.array_builder.push(None);
}

fn finish(mut self) -> BooleanChunked {
fn finish(self) -> BooleanChunked {
let arr: BooleanArray = self.array_builder.into();
let arr = Arc::new(arr) as ArrayRef;

Expand Down Expand Up @@ -90,12 +90,10 @@ where
self.array_builder.push(None)
}

fn finish(mut self) -> ChunkedArray<T> {
let arr = self.array_builder.into_arc();

fn finish(self) -> ChunkedArray<T> {
ChunkedArray {
field: Arc::new(self.field),
chunks: vec![arr],
chunks: vec![self.array_builder.into_arc()],
phantom: PhantomData,
categorical_map: None,
}
Expand Down Expand Up @@ -152,10 +150,10 @@ impl Utf8ChunkedBuilder {

#[inline]
pub fn append_option<S: AsRef<str>>(&mut self, opt: Option<S>) {
self.builder.push(opt.map(|x| x.as_ref()));
self.builder.push(opt);
}

pub fn finish(mut self) -> Utf8Chunked {
pub fn finish(self) -> Utf8Chunked {
let arr = self.builder.into_arc();
ChunkedArray {
field: Arc::new(self.field),
Expand Down Expand Up @@ -345,7 +343,7 @@ where

macro_rules! finish_list_builder {
($self:ident) => {{
let arr = $self.builder.into_arc();
let arr = $self.builder.as_arc();
ListChunked {
field: Arc::new($self.field.clone()),
chunks: vec![arr],
Expand All @@ -360,26 +358,31 @@ where
T: PolarsPrimitiveType,
{
pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self {
let builder =
LargePrimitiveBuilder::<T::Native>::with_capacities(capacity, values_capacity);
let values = MutablePrimitiveArray::<T::Native>::with_capacity(values_capacity);
let builder = LargePrimitiveBuilder::<T::Native>::new_with_capacity(values, capacity);
let field = Field::new(name, DataType::List(T::get_dtype().to_arrow()));

Self { builder, field }
}

pub fn append_slice(&mut self, opt_v: Option<&[T::Native]>) {
match opt_v {
Some(v) => {
self.builder.push(v);
Some(items) => {
let values = self.builder.mut_values();
values.reserve(items.len());
for item in items {
values.push(Some(*item))
}
self.builder.try_push_valid().unwrap();
}
None => {
self.builder.append(false).expect("should not fail");
self.builder.push_null();
}
}
}

pub fn append_null(&mut self) {
self.builder.append(false).expect("should not fail");
self.builder.push_null();
}
}

Expand All @@ -393,38 +396,35 @@ where
match opt_s {
Some(s) => self.append_series(s),
None => {
self.builder.push(None);
self.builder.push_null();
}
}
}

#[inline]
fn append_null(&mut self) {
self.builder.push(None);
self.builder.push_null();
}

#[inline]
fn append_series(&mut self, s: &Series) {
let builder = self.builder.values();
let arrays = s.chunks();
for a in arrays {
let values = a.get_values::<T>();
// we would like to check if array has no null values.
// however at the time of writing there is a bug in append_slice, because it does not update
// the null bitmap
if s.null_count() == 0 {
builder.append_slice(values);
} else {
values.iter().enumerate().for_each(|(idx, v)| {
if a.is_valid(idx) {
builder.append_value(*v);
} else {
builder.append_null();
}
});
}

let arrays = arrays
.iter()
.map(|x| {
x.as_any()
.downcast_ref::<PrimitiveArray<T::Native>>()
.unwrap()
})
.map(|x| x.iter())
.flatten();
let values = self.builder.mut_values();
values.reserve(s.len());
for v in arrays {
values.push(v.copied())
}
self.builder.append(true).unwrap();
self.builder.try_push_valid().unwrap();
}

fn finish(&mut self) -> ListChunked {
Expand All @@ -443,7 +443,8 @@ pub struct ListUtf8ChunkedBuilder {

impl ListUtf8ChunkedBuilder {
pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self {
let builder = LargeListUtf8Builder::with_capacities(capacity, values_capacity);
let values = MutableUtf8Array::<i64>::with_capacity(values_capacity);
let builder = LargeListUtf8Builder::new_with_capacity(values, capacity);
let field = Field::new(name, DataType::List(ArrowDataType::LargeUtf8));

ListUtf8ChunkedBuilder { builder, field }
Expand All @@ -455,29 +456,22 @@ impl ListBuilderTrait for ListUtf8ChunkedBuilder {
match opt_s {
Some(s) => self.append_series(s),
None => {
self.builder.append(false).unwrap();
self.builder.push_null();
}
}
}

#[inline]
fn append_null(&mut self) {
let builder = self.builder.values();
builder.append_null().unwrap();
self.builder.append(true).unwrap();
self.builder.push_null();
}

#[inline]
fn append_series(&mut self, s: &Series) {
let ca = s.utf8().unwrap();
let value_builder = self.builder.values();
for s in ca {
match s {
Some(s) => value_builder.append_value(s).unwrap(),
None => value_builder.append_null().unwrap(),
};
}
self.builder.append(true).unwrap();
let value_builder = self.builder.mut_values();
value_builder.try_extend(ca).unwrap();
self.builder.try_push_valid().unwrap();
}

fn finish(&mut self) -> ListChunked {
Expand All @@ -491,8 +485,9 @@ pub struct ListBooleanChunkedBuilder {
}

impl ListBooleanChunkedBuilder {
pub fn new(name: &str, capacity: usize, values_capacacity: usize) -> Self {
let builder = LargeListBooleanBuilder::with_capacities(capacity, values_capacacity);
pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self {
let values = MutableBooleanArray::with_capacity(values_capacity);
let builder = LargeListBooleanBuilder::new_with_capacity(values, capacity);
let field = Field::new(name, DataType::List(ArrowDataType::Boolean));

Self { builder, field }
Expand All @@ -504,29 +499,22 @@ impl ListBuilderTrait for ListBooleanChunkedBuilder {
match opt_s {
Some(s) => self.append_series(s),
None => {
self.builder.append(false).unwrap();
self.builder.push_null();
}
}
}

#[inline]
fn append_null(&mut self) {
let builder = self.builder.values();
builder.append_null();
self.builder.append(true).unwrap();
self.builder.push_null();
}

#[inline]
fn append_series(&mut self, s: &Series) {
let ca = s.bool().unwrap();
let value_builder = self.builder.values();
for s in ca {
match s {
Some(s) => value_builder.append_value(s),
None => value_builder.append_null(),
};
}
self.builder.append(true).unwrap();
let value_builder = self.builder.mut_values();
value_builder.extend(ca);
self.builder.try_push_valid().unwrap();
}

fn finish(&mut self) -> ListChunked {
Expand All @@ -542,7 +530,8 @@ pub fn get_list_builder(
) -> Box<dyn ListBuilderTrait> {
macro_rules! get_primitive_builder {
($type:ty) => {{
let builder = ListPrimitiveChunkedBuilder::new(&name, value_capacity);
let builder =
ListPrimitiveChunkedBuilder::<$type>::new(&name, list_capacity, value_capacity);
Box::new(builder)
}};
}
Expand Down Expand Up @@ -584,7 +573,7 @@ mod test {

#[test]
fn test_list_builder() {
let mut builder = ListPrimitiveChunkedBuilder::new("a", 10, 5);
let mut builder = ListPrimitiveChunkedBuilder::<Int32Type>::new("a", 10, 5);

// create a series containing two chunks
let mut s1 = Int32Chunked::new_from_slice("a", &[1, 2, 3]).into_series();
Expand Down
41 changes: 28 additions & 13 deletions polars/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,22 @@ macro_rules! cast_from_dtype {
}};
}

fn cast_from_dtype<N, T>(chunked: &ChunkedArray<T>, dtype: DataType) -> Result<ChunkedArray<N>>
where
N: PolarsNumericType,
T: PolarsNumericType,
N::Native: NumCast,
T::Native: NumCast,
{
let chunks = chunked
.downcast_iter()
.into_iter()
.map(|arr| cast_physical::<T, N>(arr, &dtype))
.collect();

Ok(ChunkedArray::new_from_chunks(chunked.field.name(), chunks))
}

macro_rules! cast_with_dtype {
($self:expr, $data_type:expr) => {{
use DataType::*;
Expand Down Expand Up @@ -140,26 +156,25 @@ where
// the underlying datatype is i64 so we transmute array
(Duration(_), Int64) => {
cast_from_dtype!(self, cast_logical, Int64)
},
}
// paths not supported by arrow kernel
// to float32
(Duration(_), Float32) | (Date32, Float32) | (Date64, Float32)
(Duration(_), Float32) | (Date32, Float32) | (Date64, Float32) => {
cast_from_dtype::<Float32Type, _>(self, Float32)?.cast::<N>()
}
// to float64
| (Duration(_), Float64) | (Date32, Float64) | (Date64, Float64)
// underlying type: i64
| (Duration(_), UInt64)
=> {
cast_from_dtype!(self, cast_physical, N::get_dtype())
(Duration(_), Float64) | (Date32, Float64) | (Date64, Float64) => {
cast_from_dtype::<Float64Type, _>(self, Float64)?.cast::<N>()
}
// to uint64
(Duration(_), UInt64) => cast_from_dtype::<UInt64Type, _>(self, UInt64)?.cast::<N>(),
// to date64
(Float64, Date64) | (Float32, Date64) => {
let out: Result<Int64Chunked> = cast_from_dtype!(self, cast_physical, Int64);
out?.cast::<N>()
cast_from_dtype::<Date64Type, _>(self, Date64)?.cast::<N>()
}
// to date64
// to date32
(Float64, Date32) | (Float32, Date32) => {
let out: Result<Int32Chunked> = cast_from_dtype!(self, cast_physical, Int32);
out?.cast::<N>()
cast_from_dtype::<Date32Type, _>(self, Date32)?.cast::<N>()
}
_ => cast_ca(self),
};
Expand Down Expand Up @@ -273,7 +288,7 @@ impl ChunkCast for ListChunked {
DataType::List(child_type) => {
let chunks = self
.downcast_iter()
.map(|list| cast_inner_list_type(list, &data_type.to_arrow()))
.map(|list| cast_inner_list_type(list, child_type))
.collect::<Result<_>>()?;
let ca = ListChunked::new_from_chunks(self.name(), chunks);
Ok(ca.into_series())
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ impl<'a> Iterator for BoolIterNoNull<'a> {
} else {
let old = self.current;
self.current += 1;
unsafe { Some(self.array.value(old)) }
unsafe { Some(self.array.value_unchecked(old)) }
}
}

Expand Down

0 comments on commit 92d5e33

Please sign in to comment.