cleanup and lint default features

pola-rs · Apr 16, 2021 · ef686fa · ef686fa
1 parent eedfd15
commit ef686fa
Show file tree

Hide file tree

Showing 10 changed files with 137 additions and 132 deletions.
diff --git a/.github/workflows/build-test.yaml b/.github/workflows/build-test.yaml
@@ -28,6 +28,7 @@ jobs:
           -p polars-io \
           -p polars-lazy \
           -- -D warnings
+          cargo clippy
       - name: Feature test
         run: |
           cd polars && cargo hack check --each-feature --no-dev-deps

diff --git a/polars/Cargo.toml b/polars/Cargo.toml
@@ -18,7 +18,6 @@ default = ["docs",
     "temporal",
     "performant",
     "plain_fmt",
-    "mimalloc",
     "dtype-slim",
 ]
 ndarray = ["polars-core/ndarray"]
@@ -95,7 +94,6 @@ dtype-u64 = ["polars-core/dtype-u64", "polars-lazy/dtype-u64", "polars-io/dtype-
 polars-core = {version = "0.12.0", path = "./polars-core", features= ["docs"], default-features = false}
 polars-io = {version = "0.12.0", path = "./polars-io", default-features = false, optional=true}
 polars-lazy = {version = "0.12.0", path = "./polars-lazy", default-features = false, optional=true}
-mimalloc = { version = "*", default-features = false, optional = true}
 
 [dev-dependencies]
 criterion = "0.3"

diff --git a/polars/polars-core/src/frame/groupby/mod.rs b/polars/polars-core/src/frame/groupby/mod.rs
@@ -1,10 +1,4 @@
-use std::hash::{BuildHasher, Hash, Hasher};
-use std::{
-    fmt::{Debug, Formatter},
-    ops::Add,
-};
-
-use crate::chunked_array::{builder::PrimitiveChunkedBuilder, float::IntegerDecode};
+use crate::chunked_array::builder::PrimitiveChunkedBuilder;
 use crate::frame::select::Selection;
 use crate::prelude::*;
 use crate::utils::{accumulate_dataframes_vertical, split_ca, split_df, NoNull};
@@ -16,8 +10,9 @@ use crate::POOL;
 use ahash::RandomState;
 use hashbrown::{hash_map::RawEntryMut, HashMap};
 use itertools::Itertools;
-use num::{Num, NumCast, Zero};
 use rayon::prelude::*;
+use std::fmt::Debug;
+use std::hash::{BuildHasher, Hash, Hasher};
 
 pub mod aggregations;
 #[cfg(feature = "pivot")]
@@ -486,115 +481,6 @@ impl IntoGroupTuples for ListChunked {}
 #[cfg(feature = "object")]
 impl<T> IntoGroupTuples for ObjectChunked<T> {}
 
-/// Utility enum used for grouping on multiple columns
-#[derive(Copy, Clone, Hash, Eq, PartialEq)]
-pub(crate) enum Groupable<'a> {
-    Boolean(bool),
-    Utf8(&'a str),
-    UInt8(u8),
-    UInt16(u16),
-    UInt32(u32),
-    UInt64(u64),
-    Int8(i8),
-    Int16(i16),
-    Int32(i32),
-    Int64(i64),
-    // mantissa, exponent, sign.
-    Float32(u64, i16, i8),
-    Float64(u64, i16, i8),
-}
-
-impl<'a> Debug for Groupable<'a> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        use Groupable::*;
-        match self {
-            Boolean(v) => write!(f, "{}", v),
-            Utf8(v) => write!(f, "{}", v),
-            UInt8(v) => write!(f, "{}", v),
-            UInt16(v) => write!(f, "{}", v),
-            UInt32(v) => write!(f, "{}", v),
-            UInt64(v) => write!(f, "{}", v),
-            Int8(v) => write!(f, "{}", v),
-            Int16(v) => write!(f, "{}", v),
-            Int32(v) => write!(f, "{}", v),
-            Int64(v) => write!(f, "{}", v),
-            Float32(m, e, s) => write!(f, "float32 mantissa: {} exponent: {} sign: {}", m, e, s),
-            Float64(m, e, s) => write!(f, "float64 mantissa: {} exponent: {} sign: {}", m, e, s),
-        }
-    }
-}
-
-impl From<f64> for Groupable<'_> {
-    fn from(v: f64) -> Self {
-        let (m, e, s) = v.integer_decode();
-        Groupable::Float64(m, e, s)
-    }
-}
-impl From<f32> for Groupable<'_> {
-    fn from(v: f32) -> Self {
-        let (m, e, s) = v.integer_decode();
-        Groupable::Float32(m, e, s)
-    }
-}
-
-fn float_to_groupable_iter<'a, T>(
-    ca: &'a ChunkedArray<T>,
-) -> Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>
-where
-    T: PolarsNumericType,
-    T::Native: Into<Groupable<'a>>,
-{
-    let iter = ca.into_iter().map(|opt_v| opt_v.map(|v| v.into()));
-    Box::new(iter)
-}
-
-impl<'b> (dyn SeriesTrait + 'b) {
-    pub(crate) fn as_groupable_iter<'a>(
-        &'a self,
-    ) -> Result<Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>> {
-        macro_rules! as_groupable_iter {
-            ($ca:expr, $variant:ident ) => {{
-                let bx = Box::new($ca.into_iter().map(|opt_b| opt_b.map(Groupable::$variant)));
-                Ok(bx)
-            }};
-        }
-
-        match self.dtype() {
-            DataType::Boolean => as_groupable_iter!(self.bool().unwrap(), Boolean),
-            DataType::UInt8 => as_groupable_iter!(self.u8().unwrap(), UInt8),
-            DataType::UInt16 => as_groupable_iter!(self.u16().unwrap(), UInt16),
-            DataType::UInt32 => as_groupable_iter!(self.u32().unwrap(), UInt32),
-            DataType::UInt64 => as_groupable_iter!(self.u64().unwrap(), UInt64),
-            DataType::Int8 => as_groupable_iter!(self.i8().unwrap(), Int8),
-            DataType::Int16 => as_groupable_iter!(self.i16().unwrap(), Int16),
-            DataType::Int32 => as_groupable_iter!(self.i32().unwrap(), Int32),
-            DataType::Int64 => as_groupable_iter!(self.i64().unwrap(), Int64),
-            DataType::Date32 => {
-                as_groupable_iter!(self.date32().unwrap(), Int32)
-            }
-            DataType::Date64 => {
-                as_groupable_iter!(self.date64().unwrap(), Int64)
-            }
-            DataType::Time64(TimeUnit::Nanosecond) => {
-                as_groupable_iter!(self.time64_nanosecond().unwrap(), Int64)
-            }
-            DataType::Duration(TimeUnit::Nanosecond) => {
-                as_groupable_iter!(self.duration_nanosecond().unwrap(), Int64)
-            }
-            DataType::Duration(TimeUnit::Millisecond) => {
-                as_groupable_iter!(self.duration_millisecond().unwrap(), Int64)
-            }
-            DataType::Utf8 => as_groupable_iter!(self.utf8().unwrap(), Utf8),
-            DataType::Float32 => Ok(float_to_groupable_iter(self.f32().unwrap())),
-            DataType::Float64 => Ok(float_to_groupable_iter(self.f64().unwrap())),
-            DataType::Categorical => as_groupable_iter!(self.categorical().unwrap(), UInt32),
-            dt => Err(PolarsError::Other(
-                format!("Column with dtype {:?} is not groupable", dt).into(),
-            )),
-        }
-    }
-}
-
 impl DataFrame {
     pub fn groupby_with_series(&self, by: Vec<Series>, multithreaded: bool) -> Result<GroupBy> {
         if by.is_empty() || by[0].len() != self.height() {
@@ -722,6 +608,7 @@ pub struct GroupBy<'df, 'selection_str> {
 }
 
 impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
+    #[cfg(feature = "downsample")]
     fn new(
         df: &'df DataFrame,
         by: Vec<Series>,

diff --git a/polars/polars-core/src/frame/groupby/pivot.rs b/polars/polars-core/src/frame/groupby/pivot.rs
@@ -1,4 +1,121 @@
-use super::*;
+use super::GroupBy;
+use crate::chunked_array::float::IntegerDecode;
+use crate::prelude::*;
+use hashbrown::HashMap;
+use itertools::Itertools;
+use num::{Num, NumCast, Zero};
+use std::collections::hash_map::RandomState;
+use std::fmt::{Debug, Formatter};
+use std::ops::Add;
+
+/// Utility enum used for grouping on multiple columns
+#[derive(Copy, Clone, Hash, Eq, PartialEq)]
+pub(crate) enum Groupable<'a> {
+    Boolean(bool),
+    Utf8(&'a str),
+    UInt8(u8),
+    UInt16(u16),
+    UInt32(u32),
+    UInt64(u64),
+    Int8(i8),
+    Int16(i16),
+    Int32(i32),
+    Int64(i64),
+    // mantissa, exponent, sign.
+    Float32(u64, i16, i8),
+    Float64(u64, i16, i8),
+}
+
+impl<'a> Debug for Groupable<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        use Groupable::*;
+        match self {
+            Boolean(v) => write!(f, "{}", v),
+            Utf8(v) => write!(f, "{}", v),
+            UInt8(v) => write!(f, "{}", v),
+            UInt16(v) => write!(f, "{}", v),
+            UInt32(v) => write!(f, "{}", v),
+            UInt64(v) => write!(f, "{}", v),
+            Int8(v) => write!(f, "{}", v),
+            Int16(v) => write!(f, "{}", v),
+            Int32(v) => write!(f, "{}", v),
+            Int64(v) => write!(f, "{}", v),
+            Float32(m, e, s) => write!(f, "float32 mantissa: {} exponent: {} sign: {}", m, e, s),
+            Float64(m, e, s) => write!(f, "float64 mantissa: {} exponent: {} sign: {}", m, e, s),
+        }
+    }
+}
+
+impl From<f64> for Groupable<'_> {
+    fn from(v: f64) -> Self {
+        let (m, e, s) = v.integer_decode();
+        Groupable::Float64(m, e, s)
+    }
+}
+impl From<f32> for Groupable<'_> {
+    fn from(v: f32) -> Self {
+        let (m, e, s) = v.integer_decode();
+        Groupable::Float32(m, e, s)
+    }
+}
+
+fn float_to_groupable_iter<'a, T>(
+    ca: &'a ChunkedArray<T>,
+) -> Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>
+where
+    T: PolarsNumericType,
+    T::Native: Into<Groupable<'a>>,
+{
+    let iter = ca.into_iter().map(|opt_v| opt_v.map(|v| v.into()));
+    Box::new(iter)
+}
+
+impl<'b> (dyn SeriesTrait + 'b) {
+    pub(crate) fn as_groupable_iter<'a>(
+        &'a self,
+    ) -> Result<Box<dyn Iterator<Item = Option<Groupable>> + 'a + Send>> {
+        macro_rules! as_groupable_iter {
+            ($ca:expr, $variant:ident ) => {{
+                let bx = Box::new($ca.into_iter().map(|opt_b| opt_b.map(Groupable::$variant)));
+                Ok(bx)
+            }};
+        }
+
+        match self.dtype() {
+            DataType::Boolean => as_groupable_iter!(self.bool().unwrap(), Boolean),
+            DataType::UInt8 => as_groupable_iter!(self.u8().unwrap(), UInt8),
+            DataType::UInt16 => as_groupable_iter!(self.u16().unwrap(), UInt16),
+            DataType::UInt32 => as_groupable_iter!(self.u32().unwrap(), UInt32),
+            DataType::UInt64 => as_groupable_iter!(self.u64().unwrap(), UInt64),
+            DataType::Int8 => as_groupable_iter!(self.i8().unwrap(), Int8),
+            DataType::Int16 => as_groupable_iter!(self.i16().unwrap(), Int16),
+            DataType::Int32 => as_groupable_iter!(self.i32().unwrap(), Int32),
+            DataType::Int64 => as_groupable_iter!(self.i64().unwrap(), Int64),
+            DataType::Date32 => {
+                as_groupable_iter!(self.date32().unwrap(), Int32)
+            }
+            DataType::Date64 => {
+                as_groupable_iter!(self.date64().unwrap(), Int64)
+            }
+            DataType::Time64(TimeUnit::Nanosecond) => {
+                as_groupable_iter!(self.time64_nanosecond().unwrap(), Int64)
+            }
+            DataType::Duration(TimeUnit::Nanosecond) => {
+                as_groupable_iter!(self.duration_nanosecond().unwrap(), Int64)
+            }
+            DataType::Duration(TimeUnit::Millisecond) => {
+                as_groupable_iter!(self.duration_millisecond().unwrap(), Int64)
+            }
+            DataType::Utf8 => as_groupable_iter!(self.utf8().unwrap(), Utf8),
+            DataType::Float32 => Ok(float_to_groupable_iter(self.f32().unwrap())),
+            DataType::Float64 => Ok(float_to_groupable_iter(self.f64().unwrap())),
+            DataType::Categorical => as_groupable_iter!(self.categorical().unwrap(), UInt32),
+            dt => Err(PolarsError::Other(
+                format!("Column with dtype {:?} is not groupable", dt).into(),
+            )),
+        }
+    }
+}
 
 impl<'df, 'selection_str> GroupBy<'df, 'selection_str> {
     /// Pivot a column of the current `DataFrame` and perform one of the following aggregations:

diff --git a/polars/polars-io/src/lib.rs b/polars/polars-io/src/lib.rs
@@ -22,8 +22,6 @@ use arrow::{
     record_batch::RecordBatch,
 };
 use polars_core::prelude::*;
-use polars_core::utils::accumulate_dataframes_vertical;
-use std::convert::TryFrom;
 use std::io::{Read, Seek, Write};
 use std::sync::Arc;
 
@@ -83,13 +81,17 @@ impl<R: Read> ArrowReader for ArrowJsonReader<R> {
     }
 }
 
+#[cfg(any(feature = "ipc", feature = "parquet", feature = "json"))]
 pub(crate) fn finish_reader<R: ArrowReader>(
     mut reader: R,
     rechunk: bool,
     stop_after_n_rows: Option<usize>,
     predicate: Option<Arc<dyn PhysicalIoExpr>>,
     aggregate: Option<&[ScanAggregation]>,
 ) -> Result<DataFrame> {
+    use polars_core::utils::accumulate_dataframes_vertical;
+    use std::convert::TryFrom;
+
     let mut n_rows = 0;
     let mut parsed_dfs = Vec::with_capacity(1024);
 
@@ -164,6 +166,7 @@ pub enum ScanAggregation {
 
 impl ScanAggregation {
     /// Evaluate the aggregations per batch.
+    #[cfg(any(feature = "ipc", feature = "parquet", feature = "json"))]
     pub(crate) fn evaluate_batch(&self, df: &DataFrame) -> Result<Series> {
         use ScanAggregation::*;
         let s = match self {

diff --git a/polars/polars-lazy/src/logical_plan/mod.rs b/polars/polars-lazy/src/logical_plan/mod.rs
@@ -12,7 +12,8 @@ use itertools::Itertools;
 use polars_core::frame::hash_join::JoinType;
 use polars_core::prelude::*;
 use polars_io::csv_core::utils::infer_file_schema;
-use polars_io::prelude::*;
+#[cfg(feature = "parquet")]
+use polars_io::{parquet::ParquetReader, SerReader};
 use std::collections::HashSet;
 use std::{
     cell::Cell,

diff --git a/polars/src/lib.rs b/polars/src/lib.rs
@@ -302,10 +302,3 @@ pub use polars_core::df;
 pub use polars_io as io;
 #[cfg(feature = "lazy")]
 pub use polars_lazy as lazy;
-
-#[cfg(feature = "mimalloc")]
-use mimalloc::MiMalloc;
-
-#[cfg(feature = "mimalloc")]
-#[global_allocator]
-static GLOBAL: MiMalloc = MiMalloc;
diff --git a/py-polars/Cargo.lock b/py-polars/Cargo.lock
diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml
@@ -21,6 +21,7 @@ libc = "0.2"
 thiserror = "1.0.20"
 numpy = "0.13.0"
 ndarray = "0.14.0"
+mimalloc = { version = "*", default-features = false}
 
 [dependencies.polars]
 path = "../polars"
@@ -35,7 +36,6 @@ features = [
     "ipc",
     "csv-file",
     "pretty_fmt",
-    "mimalloc",
     "performant",
     "dtype-full",
     "pivot",

diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs
@@ -27,6 +27,11 @@ pub mod prelude;
 pub mod series;
 pub mod utils;
 
+use mimalloc::MiMalloc;
+
+#[global_allocator]
+static GLOBAL: MiMalloc = MiMalloc;
+
 #[pyfunction]
 fn col(name: &str) -> dsl::PyExpr {
     dsl::col(name)