Skip to content

Commit

Permalink
cleanup more old code
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 18, 2022
1 parent 498f8b3 commit b368fce
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 154 deletions.
173 changes: 45 additions & 128 deletions polars/polars-core/src/frame/groupby/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,8 @@ impl DataFrame {
/// ```
/// use polars_core::prelude::*;
/// fn groupby_sum(df: &DataFrame) -> Result<DataFrame> {
/// df.groupby("column_name")?
/// .select("agg_column_name")
/// df.groupby(["column_name"])?
/// .select(["agg_column_name"])
/// .sum()
/// }
/// ```
Expand Down Expand Up @@ -480,9 +480,9 @@ impl DataFrame {
/// let s0 = DateChunked::parse_from_str_slice("date", dates, fmt)
/// .into_series();
/// // create temperature series
/// let s1 = Series::new("temp", [20, 10, 7, 9, 1].as_ref());
/// let s1 = Series::new("temp", [20, 10, 7, 9, 1]);
/// // create rain series
/// let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01].as_ref());
/// let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01]);
/// // create a new DataFrame
/// let df = DataFrame::new(vec![s0, s1, s2]).unwrap();
/// println!("{:?}", df);
Expand Down Expand Up @@ -606,7 +606,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select(&["temp", "rain"]).mean()
/// df.groupby(["date"])?.select(&["temp", "rain"]).mean()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -645,7 +645,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").sum()
/// df.groupby(["date"])?.select(["temp"]).sum()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -684,7 +684,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").min()
/// df.groupby(["date"])?.select(["temp"]).min()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -722,7 +722,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").max()
/// df.groupby(["date"])?.select(["temp"]).max()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -760,7 +760,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").first()
/// df.groupby(["date"])?.select(["temp"]).first()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -796,7 +796,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").last()
/// df.groupby(["date"])?.select(["temp"]).last()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -832,7 +832,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").n_unique()
/// df.groupby(["date"])?.select(["temp"]).n_unique()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -872,7 +872,7 @@ impl<'df> GroupBy<'df> {
/// # use polars_arrow::prelude::QuantileInterpolOptions;
///
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").quantile(0.2, QuantileInterpolOptions::default())
/// df.groupby(["date"])?.select(["temp"]).quantile(0.2, QuantileInterpolOptions::default())
/// }
/// ```
pub fn quantile(&self, quantile: f64, interpol: QuantileInterpolOptions) -> Result<DataFrame> {
Expand Down Expand Up @@ -901,7 +901,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").median()
/// df.groupby(["date"])?.select(["temp"]).median()
/// }
/// ```
pub fn median(&self) -> Result<DataFrame> {
Expand Down Expand Up @@ -952,7 +952,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.select("temp").count()
/// df.groupby(["date"])?.select(["temp"]).count()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -992,7 +992,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.groups()
/// df.groupby(["date"])?.groups()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -1045,7 +1045,7 @@ impl<'df> GroupBy<'df> {
/// ```rust
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// df.groupby("date")?.agg(&[("temp", &["n_unique", "sum", "min"])])
/// df.groupby(["date"])?.agg(&[("temp", &["n_unique", "sum", "min"])])
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -1141,7 +1141,7 @@ impl<'df> GroupBy<'df> {
/// # use polars_core::prelude::*;
/// fn example(df: DataFrame) -> Result<DataFrame> {
/// // GroupBy and aggregate to Lists
/// df.groupby("date")?.select("temp").agg_list()
/// df.groupby(["date"])?.select(["temp"]).agg_list()
/// }
/// ```
/// Returns:
Expand Down Expand Up @@ -1278,7 +1278,7 @@ mod test {
#[test]
#[cfg(feature = "dtype-date")]
#[cfg_attr(miri, ignore)]
fn test_group_by() {
fn test_group_by() -> Result<()> {
let s0 = DateChunked::parse_from_str_slice(
"date",
&[
Expand All @@ -1291,121 +1291,44 @@ mod test {
"%Y-%m-%d",
)
.into_series();
let s1 = Series::new("temp", [20, 10, 7, 9, 1].as_ref());
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01].as_ref());
let s1 = Series::new("temp", [20, 10, 7, 9, 1]);
let s2 = Series::new("rain", [0.2, 0.1, 0.3, 0.1, 0.01]);
let df = DataFrame::new(vec![s0, s1, s2]).unwrap();

println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.count()
.unwrap()
let out = df.groupby_stable(["date"])?.select(["temp"]).count()?;
assert_eq!(
out.column("temp_count")?,
&Series::new("temp_count", [2u32, 2, 1])
);

// Select multiple
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(&["temp", "rain"])
.mean()
.unwrap()
let out = df
.groupby_stable(["date"])?
.select(&["temp", "rain"])
.mean()?;
assert_eq!(
out.column("temp_mean")?,
&Series::new("temp_mean", [15.0f64, 4.0, 9.0])
);

// Group by multiple
println!(
"multiple keys {:?}",
df.groupby(&["date", "temp"])
.unwrap()
.select(["rain"])
.mean()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.sum()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.min()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.max()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.agg_list()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.first()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.last()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.n_unique()
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.quantile(0.2, QuantileInterpolOptions::default())
.unwrap()
);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.select(["temp"])
.median()
.unwrap()
let out = df
.groupby_stable(&["date", "temp"])?
.select(["rain"])
.mean()?;
assert!(out.column("rain_mean").is_ok());

let out = df.groupby_stable(["date"])?.select(["temp"]).sum()?;
assert_eq!(
out.column("temp_sum")?,
&Series::new("temp_sum", [30, 8, 9])
);

// implicit select all and only aggregate on methods that support that aggregation
let gb = df.groupby(["date"]).unwrap().n_unique().unwrap();
println!("{:?}", df.groupby(["date"]).unwrap().n_unique().unwrap());
// check the group by column is filtered out.
assert_eq!(gb.width(), 3);
println!(
"{:?}",
df.groupby(["date"])
.unwrap()
.agg(&[("temp", &["n_unique", "sum", "min"])])
.unwrap()
);
println!("{:?}", df.groupby(["date"]).unwrap().groups().unwrap());
Ok(())
}

#[test]
Expand All @@ -1428,7 +1351,6 @@ mod test {

let df =
DataFrame::new(vec![s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12]).unwrap();
println!("{:?}", df);

let adf = df
.groupby(&[
Expand All @@ -1439,8 +1361,6 @@ mod test {
.sum()
.unwrap();

println!("{:?}", adf);

assert_eq!(
Vec::from(&adf.column("N_sum").unwrap().i32().unwrap().sort(false)),
&[Some(1), Some(2), Some(2), Some(6)]
Expand Down Expand Up @@ -1473,7 +1393,6 @@ mod test {

// Creat the dataframe with the computed series.
let df = DataFrame::new(series).unwrap();
println!("{:?}", df);

// Compute the aggregated DataFrame by the 13 columns defined in `series_names`.
let adf = df
Expand All @@ -1482,7 +1401,6 @@ mod test {
.select(["N"])
.sum()
.unwrap();
println!("{:?}", adf);

// Check that the results of the group-by are correct. The content of every column
// is equal, then, the grouped columns shall be equal and in the same order.
Expand Down Expand Up @@ -1636,7 +1554,6 @@ mod test {
df.try_apply("g", |s| s.cast(&DataType::Categorical))?;

let out = df.groupby(["g"])?.sum()?;
dbg!(out);
Ok(())
}
}

0 comments on commit b368fce

Please sign in to comment.