Skip to content

Commit

Permalink
ndarray init
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 9, 2020
1 parent 0cf3678 commit 8f3e53c
Show file tree
Hide file tree
Showing 8 changed files with 133 additions and 16 deletions.
8 changes: 4 additions & 4 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ pretty = ["prettytable-rs"]
simd = ["arrow/packed_simd"]
docs = []
temporal = ["chrono"]
parquet_ser = ["parquet"]
random = ["rand", "rand_distr"]
default = ["pretty", "docs", "temporal"]

Expand All @@ -31,6 +30,7 @@ prettytable-rs = { version="^0.8.0", features=["win_crlf"], optional = true, def
crossbeam = "^0.7"
chrono = {version = "^0.4.13", optional = true}
enum_dispatch = "^0.3.2"
parquet = {version = "1.0.1", optional = true}
rand = {version = "0.7.3", optional = true}
rand_distr = {version = "0.3.0", optional = true}
parquet = {version = "1", optional = true}
rand = {version = "0.7", optional = true}
rand_distr = {version = "0.3", optional = true}
ndarray = {version = "0.13", optional = true}
28 changes: 18 additions & 10 deletions polars/src/chunked_array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,18 @@ where
T: PolarsNumericType,
{
type Item = T::Native;
type IntoIter = Copied<Iter<'a, T::Native>>;
type IntoIter = Box<dyn Iterator<Item = Self::Item> + 'a>;

fn into_no_null_iter(self) -> Self::IntoIter {
self.downcast_chunks()[0]
.value_slice(0, self.len())
.into_iter()
.copied()
match self.chunks.len() {
1 => Box::new(
self.downcast_chunks()[0]
.value_slice(0, self.len())
.into_iter()
.copied(),
),
_ => Box::new(NumIterManyChunk::new(self)),
}
}
}

Expand Down Expand Up @@ -138,6 +143,8 @@ where
}

/// Many chunks no null checks
/// Both used as iterator with null checks and without. We later map Some on it for the iter
/// with null checks
pub struct NumIterManyChunk<'a, T>
where
T: PolarsNumericType,
Expand Down Expand Up @@ -216,7 +223,7 @@ impl<'a, T> Iterator for NumIterManyChunk<'a, T>
where
T: PolarsNumericType,
{
type Item = Option<T::Native>;
type Item = T::Native;

fn next(&mut self) -> Option<Self::Item> {
let opt_val = self.current_iter_left.next();
Expand All @@ -237,7 +244,8 @@ where
opt_val
};
self.idx_left += 1;
opt_val.map(Some)
// opt_val.map(Some)
opt_val
}

fn size_hint(&self) -> (usize, Option<usize>) {
Expand Down Expand Up @@ -272,7 +280,7 @@ where
opt_val
};
self.idx_right -= 1;
opt_val.map(Some)
opt_val
}
}

Expand Down Expand Up @@ -468,7 +476,7 @@ where
match self {
NumericChunkIterDispatch::SingleChunk(a) => a.next(),
NumericChunkIterDispatch::SingleChunkNullCheck(a) => a.next(),
NumericChunkIterDispatch::ManyChunk(a) => a.next(),
NumericChunkIterDispatch::ManyChunk(a) => a.next().map(Some),
NumericChunkIterDispatch::ManyChunkNullCheck(a) => a.next(),
}
}
Expand All @@ -491,7 +499,7 @@ where
match self {
NumericChunkIterDispatch::SingleChunk(a) => a.next_back(),
NumericChunkIterDispatch::SingleChunkNullCheck(a) => a.next_back(),
NumericChunkIterDispatch::ManyChunk(a) => a.next_back(),
NumericChunkIterDispatch::ManyChunk(a) => a.next_back().map(Some),
NumericChunkIterDispatch::ManyChunkNullCheck(a) => a.next_back(),
}
}
Expand Down
3 changes: 3 additions & 0 deletions polars/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ pub mod cast;
pub mod chunkops;
pub mod comparison;
pub mod iterator;
#[cfg(feature = "ndarray")]
#[doc(cfg(feature = "ndarray"))]
mod ndarray;
#[cfg(feature = "random")]
#[doc(cfg(feature = "random"))]
mod random;
Expand Down
58 changes: 58 additions & 0 deletions polars/src/chunked_array/ndarray.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
use crate::prelude::*;
use ndarray::prelude::*;

impl<T> ChunkedArray<T>
where
T: PolarsNumericType,
{
/// If data is aligned in a single chunk and has no Null values a zero copy view is returned
/// as an `ndarray`
pub fn to_ndarray(&self) -> Result<ArrayView1<T::Native>> {
let slice = self.cont_slice()?;
Ok(aview1(slice))
}
}

impl DataFrame {
/// Create a 2D `ndarray::Array` from this `DataFrame`. This requires all columns in the
/// `DataFrame` to be non-null and numeric. They will be casted to the same data type
/// (if they aren't already).
///
/// ```rust
/// use polars::prelude::*;
/// let a = UInt32Chunked::new_from_slice("a", &[1, 2, 3]).into_series();
/// let b = Float64Chunked::new_from_slice("b", &[10., 8., 6.]).into_series();
///
/// let df = DataFrame::new(vec![a, b]).unwrap();
/// let ndarray = df.to_ndarray::<Float64Type>().unwrap();
/// println!("{:?}", ndarray);
/// ```
/// Outputs:
/// ```text
/// [[1.0, 10.0],
/// [2.0, 8.0],
/// [3.0, 6.0]], shape=[3, 2], strides=[2, 1], layout=C (0x1), const ndim=2/
/// ```
pub fn to_ndarray<N>(&self) -> Result<Array2<N::Native>>
where
N: PolarsNumericType,
N::Native: num::Zero + Copy,
{
let mut ndarr = Array2::zeros(self.shape());
for (col_idx, series) in self.get_columns().iter().enumerate() {
if series.null_count() != 0 {
return Err(PolarsError::HasNullValues);
}
// this is an Arc clone if already of type N
let series = series.cast::<N>()?;
let ca = series.unpack::<N>()?;

ca.into_no_null_iter()
.enumerate()
.for_each(|(row_idx, val)| {
*&mut ndarr[[row_idx, col_idx]] = val;
})
}
Ok(ndarr)
}
}
1 change: 1 addition & 0 deletions polars/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ impl DataFrame {
}

/// Get a reference to the DataFrame columns.
#[inline]
pub fn get_columns(&self) -> &DfColumns {
&self.columns
}
Expand Down
2 changes: 1 addition & 1 deletion polars/src/frame/ser/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! use std::fs::File;
//!
//! fn example() -> Result<DataFrame> {
//! let r = File::open("some_file.parquet")?;
//! let r = File::open("some_file.parquet").unwrap();
//! let reader = ParquetReader::new(r);
//! reader.finish()
//! }
Expand Down
2 changes: 1 addition & 1 deletion polars/src/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub(crate) fn create_df() -> DataFrame {
let s1 = Series::new("temp", [22.1, 19.9, 7., 2., 3.].as_ref());
DataFrame::new(vec![s0, s1]).unwrap()
}
#[cfg(feature = "parquet_ser")]
#[cfg(feature = "parquet")]
pub use crate::frame::ser::parquet::ParquetReader;

#[macro_export]
Expand Down
47 changes: 47 additions & 0 deletions polars/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,53 @@ impl Series {
Ok(s)
}

/// Get the `ChunkedArray` for some `PolarsDataType`
pub fn unpack<N>(&self) -> Result<&ChunkedArray<N>>
where
N: PolarsDataType,
{
macro_rules! unpack_if_match {
($ca:ident) => {{
if *$ca.dtype() == N::get_data_type() {
unsafe { Ok(mem::transmute::<_, &ChunkedArray<N>>($ca)) }
} else {
Err(PolarsError::DataTypeMisMatch)
}
}};
}
match self {
Series::Bool(arr) => unpack_if_match!(arr),
Series::Utf8(arr) => unpack_if_match!(arr),
Series::UInt8(arr) => unpack_if_match!(arr),
Series::UInt16(arr) => unpack_if_match!(arr),
Series::UInt32(arr) => unpack_if_match!(arr),
Series::UInt64(arr) => unpack_if_match!(arr),
Series::Int8(arr) => unpack_if_match!(arr),
Series::Int16(arr) => unpack_if_match!(arr),
Series::Int32(arr) => unpack_if_match!(arr),
Series::Int64(arr) => unpack_if_match!(arr),
Series::Float32(arr) => unpack_if_match!(arr),
Series::Float64(arr) => unpack_if_match!(arr),
Series::Date32(arr) => unpack_if_match!(arr),
Series::Date64(arr) => unpack_if_match!(arr),
Series::Time32Millisecond(arr) => unpack_if_match!(arr),
Series::Time32Second(arr) => unpack_if_match!(arr),
Series::Time64Nanosecond(arr) => unpack_if_match!(arr),
Series::Time64Microsecond(arr) => unpack_if_match!(arr),
Series::DurationNanosecond(arr) => unpack_if_match!(arr),
Series::DurationMicrosecond(arr) => unpack_if_match!(arr),
Series::DurationMillisecond(arr) => unpack_if_match!(arr),
Series::DurationSecond(arr) => unpack_if_match!(arr),
Series::TimestampNanosecond(arr) => unpack_if_match!(arr),
Series::TimestampMicrosecond(arr) => unpack_if_match!(arr),
Series::TimestampMillisecond(arr) => unpack_if_match!(arr),
Series::TimestampSecond(arr) => unpack_if_match!(arr),
Series::IntervalDayTime(arr) => unpack_if_match!(arr),
Series::IntervalYearMonth(arr) => unpack_if_match!(arr),
Series::LargeList(arr) => unpack_if_match!(arr),
}
}

/// Get a single value by index. Don't use this operation for loops as a runtime cast is
/// needed for every iteration.
pub fn get(&self, index: usize) -> AnyType {
Expand Down

0 comments on commit 8f3e53c

Please sign in to comment.