Allowed non-vec in DF expressions. (#1477)

pola-rs · Oct 2, 2021 · 898d99e · 898d99e
1 parent 2a5d08f
commit 898d99e
Show file tree

Hide file tree

Showing 21 changed files with 199 additions and 231 deletions.
diff --git a/polars/benches/groupby.rs b/polars/benches/groupby.rs
@@ -29,8 +29,8 @@ fn q1(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![col("id1")])
-                .agg(vec![col("v1").sum()])
+                .groupby([col("id1")])
+                .agg([col("v1").sum()])
                 .collect()
                 .unwrap();
         })
@@ -42,8 +42,8 @@ fn q2(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![col("id1"), col("id2")])
-                .agg(vec![col("v1").sum()])
+                .groupby([col("id1"), col("id2")])
+                .agg([col("v1").sum()])
                 .collect()
                 .unwrap();
         })
@@ -55,8 +55,8 @@ fn q3(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![col("id3")])
-                .agg(vec![col("v1").sum(), col("v3").mean()])
+                .groupby([col("id3")])
+                .agg([col("v1").sum(), col("v3").mean()])
                 .collect()
                 .unwrap();
         })
@@ -68,8 +68,8 @@ fn q4(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![col("id4")])
-                .agg(vec![col("v1").mean(), col("v2").mean(), col("v3").mean()])
+                .groupby([col("id4")])
+                .agg([col("v1").mean(), col("v2").mean(), col("v3").mean()])
                 .collect()
                 .unwrap();
         })
@@ -81,8 +81,8 @@ fn q5(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![col("id6")])
-                .agg(vec![col("v1").sum(), col("v2").sum(), col("v3").sum()])
+                .groupby([col("id6")])
+                .agg([col("v1").sum(), col("v2").sum(), col("v3").sum()])
                 .collect()
                 .unwrap();
         })
@@ -94,8 +94,8 @@ fn q6(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![col("id4"), col("id5")])
-                .agg(vec![
+                .groupby([col("id4"), col("id5")])
+                .agg([
                     col("v3").median().alias("v3_median"),
                     col("v3").std().alias("v3_std"),
                 ])
@@ -110,15 +110,9 @@ fn q7(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![col("id3")])
-                .agg(vec![
-                    col("v1").max().alias("v1"),
-                    col("v2").min().alias("v2"),
-                ])
-                .select(vec![
-                    col("id3"),
-                    (col("v1") - col("v2")).alias("range_v1_v2"),
-                ])
+                .groupby([col("id3")])
+                .agg([col("v1").max().alias("v1"), col("v2").min().alias("v2")])
+                .select([col("id3"), (col("v1") - col("v2")).alias("range_v1_v2")])
                 .collect()
                 .unwrap();
         })
@@ -133,8 +127,8 @@ fn q8(c: &mut Criterion) {
                 // todo! accept slice of str
                 .drop_nulls(Some(vec![col("v3")]))
                 .sort("v3", true)
-                .groupby(vec![col("id6")])
-                .agg(vec![col("v3").head(Some(2)).alias("v3_top_2")])
+                .groupby([col("id6")])
+                .agg([col("v3").head(Some(2)).alias("v3_top_2")])
                 .explode(&[col("v3_top_2")])
                 .collect()
                 .unwrap();
@@ -148,10 +142,8 @@ fn q9(c: &mut Criterion) {
             DATA.clone()
                 .lazy()
                 .drop_nulls(Some(vec![col("v1"), col("v2")]))
-                .groupby(vec![col("id2"), col("id4")])
-                .agg(vec![pearson_corr(col("v1"), col("v2"))
-                    .alias("r2")
-                    .pow(2.0)])
+                .groupby([col("id2"), col("id4")])
+                .agg([pearson_corr(col("v1"), col("v2")).alias("r2").pow(2.0)])
                 .collect()
                 .unwrap();
         })
@@ -163,18 +155,15 @@ fn q10(c: &mut Criterion) {
         b.iter(|| {
             DATA.clone()
                 .lazy()
-                .groupby(vec![
+                .groupby([
                     col("id1"),
                     col("id2"),
                     col("id3"),
                     col("id4"),
                     col("id5"),
                     col("id6"),
                 ])
-                .agg(vec![
-                    col("v3").sum().alias("v3"),
-                    col("v1").count().alias("v1"),
-                ])
+                .agg([col("v3").sum().alias("v3"), col("v1").count().alias("v1")])
                 .collect()
                 .unwrap();
         })

diff --git a/polars/polars-core/src/frame/groupby/mod.rs b/polars/polars-core/src/frame/groupby/mod.rs
@@ -1488,7 +1488,7 @@ mod test {
             vec![1, 2, 3, 4, 4, 4, 2, 1, 1],
             vec![1, 2, 3, 4, 4, 4],
         ] {
-            let ca = UInt32Chunked::new_from_slice("", &slice);
+            let ca = UInt32Chunked::new_from_slice("", slice);
             let split = split_ca(&ca, 4).unwrap();
 
             let a = groupby(ca.into_iter()).into_iter().sorted().collect_vec();

diff --git a/polars/polars-core/src/frame/hash_join/mod.rs b/polars/polars-core/src/frame/hash_join/mod.rs
@@ -1470,7 +1470,7 @@ mod test {
     #[test]
     #[cfg_attr(miri, ignore)]
     fn test_join_multiple_columns() {
-        let (df_a, df_b) = get_dfs();
+        let (mut df_a, mut df_b) = get_dfs();
 
         // First do a hack with concatenated string dummy column
         let mut s = df_a
@@ -1483,7 +1483,6 @@ mod test {
             + df_a.column("b").unwrap().utf8().unwrap();
         s.rename("dummy");
 
-        let mut df_a = df_a.clone();
         df_a.with_column(s).unwrap();
         let mut s = df_b
             .column("foo")
@@ -1494,7 +1493,6 @@ mod test {
             .unwrap()
             + df_b.column("bar").unwrap().utf8().unwrap();
         s.rename("dummy");
-        let mut df_b = df_b.clone();
         df_b.with_column(s).unwrap();
 
         let joined = df_a.left_join(&df_b, "dummy", "dummy").unwrap();

diff --git a/polars/polars-core/src/frame/select.rs b/polars/polars-core/src/frame/select.rs
@@ -3,7 +3,7 @@
 ///
 /// &str => df.select("my-column"),
 /// (&str)" => df.select(("col_1", "col_2")),
-/// Vec<&str)" => df.select(vec!["col_a", "col_b"]),
+/// Vec<&str)" => df.select(["col_a", "col_b"]),
 pub trait Selection<'a, S> {
     fn to_selection_vec(self) -> Vec<&'a str>;
 

diff --git a/polars/polars-core/src/functions.rs b/polars/polars-core/src/functions.rs
@@ -140,8 +140,8 @@ mod test {
     fn test_pearson_corr() {
         let a = Series::new("a", &[1.0f32, 2.0]);
         let b = Series::new("b", &[1.0f32, 2.0]);
-        assert!((cov(&a.f32().unwrap(), &b.f32().unwrap()).unwrap() - 0.5).abs() < 0.001);
-        assert!((pearson_corr(&a.f32().unwrap(), &b.f32().unwrap()).unwrap() - 1.0).abs() < 0.001);
+        assert!((cov(a.f32().unwrap(), b.f32().unwrap()).unwrap() - 0.5).abs() < 0.001);
+        assert!((pearson_corr(a.f32().unwrap(), b.f32().unwrap()).unwrap() - 1.0).abs() < 0.001);
     }
 
     #[test]

diff --git a/polars/polars-io/src/csv.rs b/polars/polars-io/src/csv.rs
@@ -878,7 +878,7 @@ id090,id048,id0000067778,24,2,51862,4,9,
             .with_columns(Some(
                 schema
                     .fields()
-                    .into_iter()
+                    .iter()
                     .map(|s| s.name().to_string())
                     .collect(),
             ))

diff --git a/polars/polars-lazy/src/datafusion/mod.rs b/polars/polars-lazy/src/datafusion/mod.rs
@@ -46,9 +46,9 @@ mod test {
 
         let out = df
             .lazy()
-            .groupby(vec![col("a")])
-            .agg(vec![col("b").mean()])
-            .select(vec![col("a"), col("b_mean")])
+            .groupby([col("a")])
+            .agg([col("b").mean()])
+            .select([col("a"), col("b_mean")])
             .sort("a", false)
             .ooc()?;
 

diff --git a/polars/polars-lazy/src/dsl.rs b/polars/polars-lazy/src/dsl.rs
@@ -1063,7 +1063,7 @@ impl Expr {
     ///      .lazy()
     ///      .select(&[
     ///          col("groups"),
-    ///          sum("values").over(vec![col("groups")]),
+    ///          sum("values").over([col("groups")]),
     ///      ])
     ///      .collect()?;
     ///     dbg!(&out);
@@ -1101,10 +1101,10 @@ impl Expr {
     /// │ 1      ┆ 16     │
     /// ╰────────┴────────╯
     /// ```
-    pub fn over(self, partition_by: Vec<Expr>) -> Self {
+    pub fn over<E: AsRef<[Expr]>>(self, partition_by: E) -> Self {
         Expr::Window {
             function: Box::new(self),
-            partition_by,
+            partition_by: partition_by.as_ref().to_vec(),
             order_by: None,
             options: WindowOptions { explode: false },
         }
@@ -1353,7 +1353,7 @@ impl Expr {
     /// use polars_lazy::prelude::*;
     ///
     /// fn example(df: LazyFrame) -> LazyFrame {
-    ///     df.select(vec![
+    ///     df.select([
     /// // even thought the alias yields a different column name,
     /// // `keep_name` will make sure that the original column name is used
     ///         col("*").alias("foo").keep_name()

diff --git a/polars/polars-lazy/src/frame.rs b/polars/polars-lazy/src/frame.rs
@@ -631,11 +631,10 @@ impl LazyFrame {
     /// use polars_lazy::prelude::*;
     ///
     /// fn example(df: DataFrame) -> Result<DataFrame> {
-    ///       df.lazy()
-    ///         .groupby(vec![col("foo")])
-    ///         .agg(vec!(col("bar").sum(),
-    ///                   col("ham").mean().alias("avg_ham")))
-    ///         .collect()
+    ///     df.lazy()
+    ///       .groupby([col("foo")])
+    ///       .agg([col("bar").sum(), col("ham").mean().alias("avg_ham")])
+    ///       .collect()
     /// }
     /// ```
     pub fn collect(self) -> Result<DataFrame> {
@@ -735,32 +734,32 @@ impl LazyFrame {
     ///
     /// fn example(df: DataFrame) -> LazyFrame {
     ///       df.lazy()
-    ///        .groupby(vec![col("date")])
-    ///        .agg(vec![
+    ///        .groupby([col("date")])
+    ///        .agg([
     ///            col("rain").min(),
     ///            col("rain").sum(),
     ///            col("rain").quantile(0.5).alias("median_rain"),
     ///        ])
     ///        .sort("date", false)
     /// }
     /// ```
-    pub fn groupby(self, by: Vec<Expr>) -> LazyGroupBy {
+    pub fn groupby<E: AsRef<[Expr]>>(self, by: E) -> LazyGroupBy {
         let opt_state = self.get_opt_state();
         LazyGroupBy {
             logical_plan: self.logical_plan,
             opt_state,
-            keys: by,
+            keys: by.as_ref().to_vec(),
             maintain_order: false,
         }
     }
 
     /// Similar to groupby, but order of the DataFrame is maintained.
-    pub fn stable_groupby(self, by: Vec<Expr>) -> LazyGroupBy {
+    pub fn stable_groupby<E: AsRef<[Expr]>>(self, by: E) -> LazyGroupBy {
         let opt_state = self.get_opt_state();
         LazyGroupBy {
             logical_plan: self.logical_plan,
             opt_state,
-            keys: by,
+            keys: by.as_ref().to_vec(),
             maintain_order: true,
         }
     }
@@ -1056,16 +1055,16 @@ impl LazyGroupBy {
     ///
     /// fn example(df: DataFrame) -> LazyFrame {
     ///       df.lazy()
-    ///        .groupby(vec![col("date")])
-    ///        .agg(vec![
+    ///        .groupby([col("date")])
+    ///        .agg([
     ///            col("rain").min(),
     ///            col("rain").sum(),
     ///            col("rain").quantile(0.5).alias("median_rain"),
     ///        ])
     ///        .sort("date", false)
     /// }
     /// ```
-    pub fn agg(self, aggs: Vec<Expr>) -> LazyFrame {
+    pub fn agg<E: AsRef<[Expr]>>(self, aggs: E) -> LazyFrame {
         let lp = LogicalPlanBuilder::from(self.logical_plan)
             .groupby(Arc::new(self.keys), aggs, None, self.maintain_order)
             .build();
@@ -1081,7 +1080,7 @@ impl LazyGroupBy {
             .flatten()
             .collect::<Vec<_>>();
 
-        self.agg(vec![col("*").exclude(&keys).head(n).list().keep_name()])
+        self.agg([col("*").exclude(&keys).head(n).list().keep_name()])
             .explode(vec![col("*").exclude(&keys)])
     }
 
@@ -1094,7 +1093,7 @@ impl LazyGroupBy {
             .flatten()
             .collect::<Vec<_>>();
 
-        self.agg(vec![col("*").exclude(&keys).tail(n).list().keep_name()])
+        self.agg([col("*").exclude(&keys).tail(n).list().keep_name()])
             .explode(vec![col("*").exclude(&keys)])
     }
 

diff --git a/polars/polars-lazy/src/functions.rs b/polars/polars-lazy/src/functions.rs
@@ -65,14 +65,14 @@ pub fn pearson_corr(a: Expr, b: Expr) -> Expr {
 /// That means that the first `Series` will be used to determine the ordering
 /// until duplicates are found. Once duplicates are found, the next `Series` will
 /// be used and so on.
-pub fn argsort_by(by: Vec<Expr>, reverse: &[bool]) -> Expr {
+pub fn argsort_by<E: AsRef<[Expr]>>(by: E, reverse: &[bool]) -> Expr {
     let reverse = reverse.to_vec();
     let function = NoEq::new(Arc::new(move |by: &mut [Series]| {
         polars_core::functions::argsort_by(by, &reverse).map(|ca| ca.into_series())
     }) as Arc<dyn SeriesUdf>);
 
     Expr::Function {
-        input: by,
+        input: by.as_ref().to_vec(),
         function,
         output_type: GetOutput::from_type(DataType::UInt32),
         options: FunctionOptions {

diff --git a/polars/polars-lazy/src/lib.rs b/polars/polars-lazy/src/lib.rs
@@ -101,8 +101,8 @@
 //!     )?;
 //!
 //!     df.lazy()
-//!     .groupby(vec![col("date")])
-//!     .agg(vec![
+//!     .groupby([col("date")])
+//!     .agg([
 //!         col("rain").min(),
 //!         col("rain").sum(),
 //!         col("rain").quantile(0.5).alias("median_rain"),
@@ -162,7 +162,7 @@
 //!     .filter(
 //!         col("a").lt(lit(2))
 //!     )
-//!     .groupby(vec![col("b")])
+//!     .groupby([col("b")])
 //!     .agg(
 //!         vec![col("b").first(), col("c").first()]
 //!      )
@@ -178,7 +178,7 @@
 //!
 //! fn aggregate_all_columns(df_a: DataFrame) -> LazyFrame {
 //!     df_a.lazy()
-//!     .groupby(vec![col("b")])
+//!     .groupby([col("b")])
 //!     .agg(
 //!         vec![col("*").first()]
 //!      )

diff --git a/polars/polars-lazy/src/logical_plan/iterator.rs b/polars/polars-lazy/src/logical_plan/iterator.rs
@@ -320,8 +320,8 @@ mod test {
         let (root, lp_arena, _expr_arena) = df
             .lazy()
             .sort("a", false)
-            .groupby(vec![col("a")])
-            .agg(vec![col("a").first()])
+            .groupby([col("a")])
+            .agg([col("a").first()])
             .logical_plan
             .into_alp();