Skip to content

Commit

Permalink
cleanup and lint default features
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Apr 16, 2021
1 parent eedfd15 commit ef686fa
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 132 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ jobs:
-p polars-io \
-p polars-lazy \
-- -D warnings
cargo clippy
- name: Feature test
run: |
cd polars && cargo hack check --each-feature --no-dev-deps
Expand Down
2 changes: 0 additions & 2 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ default = ["docs",
"temporal",
"performant",
"plain_fmt",
"mimalloc",
"dtype-slim",
]
ndarray = ["polars-core/ndarray"]
Expand Down Expand Up @@ -95,7 +94,6 @@ dtype-u64 = ["polars-core/dtype-u64", "polars-lazy/dtype-u64", "polars-io/dtype-
polars-core = {version = "0.12.0", path = "./polars-core", features= ["docs"], default-features = false}
polars-io = {version = "0.12.0", path = "./polars-io", default-features = false, optional=true}
polars-lazy = {version = "0.12.0", path = "./polars-lazy", default-features = false, optional=true}
mimalloc = { version = "*", default-features = false, optional = true}

[dev-dependencies]
criterion = "0.3"
Expand Down
121 changes: 4 additions & 117 deletions polars/polars-core/src/frame/groupby/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
use std::hash::{BuildHasher, Hash, Hasher};
use std::{
fmt::{Debug, Formatter},
ops::Add,
};

use crate::chunked_array::{builder::PrimitiveChunkedBuilder, float::IntegerDecode};
use crate::chunked_array::builder::PrimitiveChunkedBuilder;
use crate::frame::select::Selection;
use crate::prelude::*;
use crate::utils::{accumulate_dataframes_vertical, split_ca, split_df, NoNull};
Expand All @@ -16,8 +10,9 @@ use crate::POOL;
use ahash::RandomState;
use hashbrown::{hash_map::RawEntryMut, HashMap};
use itertools::Itertools;
use num::{Num, NumCast, Zero};
use rayon::prelude::*;
use std::fmt::Debug;
use std::hash::{BuildHasher, Hash, Hasher};

pub mod aggregations;
#[cfg(feature = "pivot")]
Expand Down Expand Up @@ -486,115 +481,6 @@ impl IntoGroupTuples for ListChunked {}
#[cfg(feature = "object")]
impl<T> IntoGroupTuples for ObjectChunked<T> {}

/// Utility enum used for grouping on multiple columns
#[derive(Copy, Clone, Hash, Eq, PartialEq)]
pub(crate) enum Groupable<'a> {
Boolean(bool),
Utf8(&'a str),
UInt8(u8),
UInt16(u16),
UInt32(u32),
UInt64(u64),
Int8(i8),
Int16(i16),
Int32(i32),
Int64(i64),
// mantissa, exponent, sign.
Float32(u64, i16, i8),
Float64(u64, i16, i8),
}

impl<'a> Debug for Groupable<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use Groupable::*;
match self {
Boolean(v) => write!(f, "{}", v),
Utf8(v) => write!(f, "{}", v),
UInt8(v) => write!(f, "{}", v),
UInt16(v) => write!(f, "{}", v),
UInt32(v) => write!(f, "{}", v),
UInt64(v) => write!(f, "{}", v),
Int8(v) => write!(f, "{}", v),
Int16(v) => write!(f, "{}", v),
Int32(v) => write!(f, "{}", v),
Int64(v) => write!(f, "{}", v),
Float32(m, e, s) => write!(f, "float32 mantissa: {} exponent: {} sign: {}", m, e, s),
Float64(m, e, s) => write!(f, "float64 mantissa: {} exponent: {} sign: {}", m, e, s),
}
}
}

impl From<f64> for Groupable<'_> {
fn from(v: f64) -> Self {
let (m, e, s) = v.integer_decode();
Groupable::Float64(m, e, s)
}
}
impl From<f32> for Groupable<'_> {
fn from(v: f32) -> Self {
let (m, e, s) = v.integer_decode();
Groupable::Float32(m, e, s)
}
}

fn float_to_groupable_iter<'a, T>(
ca: &'a ChunkedArray<T>,
) -> Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>
where
T: PolarsNumericType,
T::Native: Into<Groupable<'a>>,
{
let iter = ca.into_iter().map(|opt_v| opt_v.map(|v| v.into()));
Box::new(iter)
}

impl<'b> (dyn SeriesTrait + 'b) {
pub(crate) fn as_groupable_iter<'a>(
&'a self,
) -> Result<Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>> {
macro_rules! as_groupable_iter {
($ca:expr, $variant:ident ) => {{
let bx = Box::new($ca.into_iter().map(|opt_b| opt_b.map(Groupable::$variant)));
Ok(bx)
}};
}

match self.dtype() {
DataType::Boolean => as_groupable_iter!(self.bool().unwrap(), Boolean),
DataType::UInt8 => as_groupable_iter!(self.u8().unwrap(), UInt8),
DataType::UInt16 => as_groupable_iter!(self.u16().unwrap(), UInt16),
DataType::UInt32 => as_groupable_iter!(self.u32().unwrap(), UInt32),
DataType::UInt64 => as_groupable_iter!(self.u64().unwrap(), UInt64),
DataType::Int8 => as_groupable_iter!(self.i8().unwrap(), Int8),
DataType::Int16 => as_groupable_iter!(self.i16().unwrap(), Int16),
DataType::Int32 => as_groupable_iter!(self.i32().unwrap(), Int32),
DataType::Int64 => as_groupable_iter!(self.i64().unwrap(), Int64),
DataType::Date32 => {
as_groupable_iter!(self.date32().unwrap(), Int32)
}
DataType::Date64 => {
as_groupable_iter!(self.date64().unwrap(), Int64)
}
DataType::Time64(TimeUnit::Nanosecond) => {
as_groupable_iter!(self.time64_nanosecond().unwrap(), Int64)
}
DataType::Duration(TimeUnit::Nanosecond) => {
as_groupable_iter!(self.duration_nanosecond().unwrap(), Int64)
}
DataType::Duration(TimeUnit::Millisecond) => {
as_groupable_iter!(self.duration_millisecond().unwrap(), Int64)
}
DataType::Utf8 => as_groupable_iter!(self.utf8().unwrap(), Utf8),
DataType::Float32 => Ok(float_to_groupable_iter(self.f32().unwrap())),
DataType::Float64 => Ok(float_to_groupable_iter(self.f64().unwrap())),
DataType::Categorical => as_groupable_iter!(self.categorical().unwrap(), UInt32),
dt => Err(PolarsError::Other(
format!("Column with dtype {:?} is not groupable", dt).into(),
)),
}
}
}

impl DataFrame {
pub fn groupby_with_series(&self, by: Vec<Series>, multithreaded: bool) -> Result<GroupBy> {
if by.is_empty() || by[0].len() != self.height() {
Expand Down Expand Up @@ -722,6 +608,7 @@ pub struct GroupBy<'df, 'selection_str> {
}

impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
#[cfg(feature = "downsample")]
fn new(
df: &'df DataFrame,
by: Vec<Series>,
Expand Down
119 changes: 118 additions & 1 deletion polars/polars-core/src/frame/groupby/pivot.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,121 @@
use super::*;
use super::GroupBy;
use crate::chunked_array::float::IntegerDecode;
use crate::prelude::*;
use hashbrown::HashMap;
use itertools::Itertools;
use num::{Num, NumCast, Zero};
use std::collections::hash_map::RandomState;
use std::fmt::{Debug, Formatter};
use std::ops::Add;

/// Utility enum used for grouping on multiple columns
#[derive(Copy, Clone, Hash, Eq, PartialEq)]
pub(crate) enum Groupable<'a> {
Boolean(bool),
Utf8(&'a str),
UInt8(u8),
UInt16(u16),
UInt32(u32),
UInt64(u64),
Int8(i8),
Int16(i16),
Int32(i32),
Int64(i64),
// mantissa, exponent, sign.
Float32(u64, i16, i8),
Float64(u64, i16, i8),
}

impl<'a> Debug for Groupable<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
use Groupable::*;
match self {
Boolean(v) => write!(f, "{}", v),
Utf8(v) => write!(f, "{}", v),
UInt8(v) => write!(f, "{}", v),
UInt16(v) => write!(f, "{}", v),
UInt32(v) => write!(f, "{}", v),
UInt64(v) => write!(f, "{}", v),
Int8(v) => write!(f, "{}", v),
Int16(v) => write!(f, "{}", v),
Int32(v) => write!(f, "{}", v),
Int64(v) => write!(f, "{}", v),
Float32(m, e, s) => write!(f, "float32 mantissa: {} exponent: {} sign: {}", m, e, s),
Float64(m, e, s) => write!(f, "float64 mantissa: {} exponent: {} sign: {}", m, e, s),
}
}
}

impl From<f64> for Groupable<'_> {
fn from(v: f64) -> Self {
let (m, e, s) = v.integer_decode();
Groupable::Float64(m, e, s)
}
}
impl From<f32> for Groupable<'_> {
fn from(v: f32) -> Self {
let (m, e, s) = v.integer_decode();
Groupable::Float32(m, e, s)
}
}

fn float_to_groupable_iter<'a, T>(
ca: &'a ChunkedArray<T>,
) -> Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>
where
T: PolarsNumericType,
T::Native: Into<Groupable<'a>>,
{
let iter = ca.into_iter().map(|opt_v| opt_v.map(|v| v.into()));
Box::new(iter)
}

impl<'b> (dyn SeriesTrait + 'b) {
pub(crate) fn as_groupable_iter<'a>(
&'a self,
) -> Result<Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>> {
macro_rules! as_groupable_iter {
($ca:expr, $variant:ident ) => {{
let bx = Box::new($ca.into_iter().map(|opt_b| opt_b.map(Groupable::$variant)));
Ok(bx)
}};
}

match self.dtype() {
DataType::Boolean => as_groupable_iter!(self.bool().unwrap(), Boolean),
DataType::UInt8 => as_groupable_iter!(self.u8().unwrap(), UInt8),
DataType::UInt16 => as_groupable_iter!(self.u16().unwrap(), UInt16),
DataType::UInt32 => as_groupable_iter!(self.u32().unwrap(), UInt32),
DataType::UInt64 => as_groupable_iter!(self.u64().unwrap(), UInt64),
DataType::Int8 => as_groupable_iter!(self.i8().unwrap(), Int8),
DataType::Int16 => as_groupable_iter!(self.i16().unwrap(), Int16),
DataType::Int32 => as_groupable_iter!(self.i32().unwrap(), Int32),
DataType::Int64 => as_groupable_iter!(self.i64().unwrap(), Int64),
DataType::Date32 => {
as_groupable_iter!(self.date32().unwrap(), Int32)
}
DataType::Date64 => {
as_groupable_iter!(self.date64().unwrap(), Int64)
}
DataType::Time64(TimeUnit::Nanosecond) => {
as_groupable_iter!(self.time64_nanosecond().unwrap(), Int64)
}
DataType::Duration(TimeUnit::Nanosecond) => {
as_groupable_iter!(self.duration_nanosecond().unwrap(), Int64)
}
DataType::Duration(TimeUnit::Millisecond) => {
as_groupable_iter!(self.duration_millisecond().unwrap(), Int64)
}
DataType::Utf8 => as_groupable_iter!(self.utf8().unwrap(), Utf8),
DataType::Float32 => Ok(float_to_groupable_iter(self.f32().unwrap())),
DataType::Float64 => Ok(float_to_groupable_iter(self.f64().unwrap())),
DataType::Categorical => as_groupable_iter!(self.categorical().unwrap(), UInt32),
dt => Err(PolarsError::Other(
format!("Column with dtype {:?} is not groupable", dt).into(),
)),
}
}
}

impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
/// Pivot a column of the current `DataFrame` and perform one of the following aggregations:
Expand Down
7 changes: 5 additions & 2 deletions polars/polars-io/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ use arrow::{
record_batch::RecordBatch,
};
use polars_core::prelude::*;
use polars_core::utils::accumulate_dataframes_vertical;
use std::convert::TryFrom;
use std::io::{Read, Seek, Write};
use std::sync::Arc;

Expand Down Expand Up @@ -83,13 +81,17 @@ impl<R: Read> ArrowReader for ArrowJsonReader<R> {
}
}

#[cfg(any(feature = "ipc", feature = "parquet", feature = "json"))]
pub(crate) fn finish_reader<R: ArrowReader>(
mut reader: R,
rechunk: bool,
stop_after_n_rows: Option<usize>,
predicate: Option<Arc<dyn PhysicalIoExpr>>,
aggregate: Option<&[ScanAggregation]>,
) -> Result<DataFrame> {
use polars_core::utils::accumulate_dataframes_vertical;
use std::convert::TryFrom;

let mut n_rows = 0;
let mut parsed_dfs = Vec::with_capacity(1024);

Expand Down Expand Up @@ -164,6 +166,7 @@ pub enum ScanAggregation {

impl ScanAggregation {
/// Evaluate the aggregations per batch.
#[cfg(any(feature = "ipc", feature = "parquet", feature = "json"))]
pub(crate) fn evaluate_batch(&self, df: &DataFrame) -> Result<Series> {
use ScanAggregation::*;
let s = match self {
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-lazy/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use itertools::Itertools;
use polars_core::frame::hash_join::JoinType;
use polars_core::prelude::*;
use polars_io::csv_core::utils::infer_file_schema;
use polars_io::prelude::*;
#[cfg(feature = "parquet")]
use polars_io::{parquet::ParquetReader, SerReader};
use std::collections::HashSet;
use std::{
cell::Cell,
Expand Down
7 changes: 0 additions & 7 deletions polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,3 @@ pub use polars_core::df;
pub use polars_io as io;
#[cfg(feature = "lazy")]
pub use polars_lazy as lazy;

#[cfg(feature = "mimalloc")]
use mimalloc::MiMalloc;

#[cfg(feature = "mimalloc")]
#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ libc = "0.2"
thiserror = "1.0.20"
numpy = "0.13.0"
ndarray = "0.14.0"
mimalloc = { version = "*", default-features = false}

[dependencies.polars]
path = "../polars"
Expand All @@ -35,7 +36,6 @@ features = [
"ipc",
"csv-file",
"pretty_fmt",
"mimalloc",
"performant",
"dtype-full",
"pivot",
Expand Down
5 changes: 5 additions & 0 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ pub mod prelude;
pub mod series;
pub mod utils;

use mimalloc::MiMalloc;

#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;

#[pyfunction]
fn col(name: &str) -> dsl::PyExpr {
dsl::col(name)
Expand Down

0 comments on commit ef686fa

Please sign in to comment.