Skip to content

Commit

Permalink
rename distinct to unique (#2926)
Browse files Browse the repository at this point in the history
* rename distinct to unique

* docs
  • Loading branch information
ritchie46 committed Mar 18, 2022
1 parent 04f3dfa commit 6b26890
Show file tree
Hide file tree
Showing 11 changed files with 57 additions and 35 deletions.
24 changes: 10 additions & 14 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub enum NullStrategy {
}

#[derive(Copy, Clone, Debug)]
pub enum DistinctKeepStrategy {
pub enum UniqueKeepStrategy {
First,
Last,
}
Expand Down Expand Up @@ -2701,8 +2701,8 @@ impl DataFrame {
#[deprecated(note = "use distinct")]
pub fn drop_duplicates(&self, maintain_order: bool, subset: Option<&[String]>) -> Result<Self> {
match maintain_order {
true => self.distinct_stable(subset, DistinctKeepStrategy::First),
false => self.distinct(subset, DistinctKeepStrategy::First),
true => self.unique_stable(subset, UniqueKeepStrategy::First),
false => self.unique(subset, UniqueKeepStrategy::First),
}
}

Expand All @@ -2721,7 +2721,7 @@ impl DataFrame {
/// "str" => ["a", "a", "b", "b", "c", "c"]
/// }?;
///
/// println!("{}", df.distinct_stable(None, DistinctKeepStrategy::First)?);
/// println!("{}", df.unique_stable(None, UniqueKeepStrategy::First)?);
/// # Ok::<(), PolarsError>(())
/// ```
/// Returns
Expand All @@ -2739,30 +2739,26 @@ impl DataFrame {
/// | 3 | 3 | "c" |
/// +-----+-----+-----+
/// ```
pub fn distinct_stable(
pub fn unique_stable(
&self,
subset: Option<&[String]>,
keep: DistinctKeepStrategy,
keep: UniqueKeepStrategy,
) -> Result<DataFrame> {
self.distinct_impl(true, subset, keep)
}

/// Unstable distinct. See [`DataFrame::distinct_stable`].
pub fn distinct(
&self,
subset: Option<&[String]>,
keep: DistinctKeepStrategy,
) -> Result<DataFrame> {
pub fn unique(&self, subset: Option<&[String]>, keep: UniqueKeepStrategy) -> Result<DataFrame> {
self.distinct_impl(false, subset, keep)
}

fn distinct_impl(
&self,
maintain_order: bool,
subset: Option<&[String]>,
keep: DistinctKeepStrategy,
keep: UniqueKeepStrategy,
) -> Result<Self> {
use DistinctKeepStrategy::*;
use UniqueKeepStrategy::*;
let names = match &subset {
Some(s) => s.iter().map(|s| &**s).collect(),
None => self.get_column_names(),
Expand Down Expand Up @@ -3113,7 +3109,7 @@ mod test {
.unwrap();
dbg!(&df);
let df = df
.distinct_stable(None, DistinctKeepStrategy::First)
.unique_stable(None, UniqueKeepStrategy::First)
.unwrap()
.sort(["flt"], false)
.unwrap();
Expand Down
8 changes: 4 additions & 4 deletions polars/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -939,10 +939,10 @@ impl LazyFrame {
}

/// Keep unique rows and maintain order
pub fn distinct_stable(
pub fn unique_stable(
self,
subset: Option<Vec<String>>,
keep_strategy: DistinctKeepStrategy,
keep_strategy: UniqueKeepStrategy,
) -> LazyFrame {
let opt_state = self.get_opt_state();
let options = DistinctOptions {
Expand All @@ -955,10 +955,10 @@ impl LazyFrame {
}

/// Keep unique rows, do not maintain order
pub fn distinct(
pub fn unique(
self,
subset: Option<Vec<String>>,
keep_strategy: DistinctKeepStrategy,
keep_strategy: UniqueKeepStrategy,
) -> LazyFrame {
let opt_state = self.get_opt_state();
let options = DistinctOptions {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/src/logical_plan/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub struct GroupbyOptions {
pub struct DistinctOptions {
pub(crate) subset: Option<Arc<Vec<String>>>,
pub(crate) maintain_order: bool,
pub(crate) keep_strategy: DistinctKeepStrategy,
pub(crate) keep_strategy: UniqueKeepStrategy,
}

#[derive(Copy, Clone, Debug, PartialEq)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ impl Executor for DropDuplicatesExec {
let keep = self.options.keep_strategy;

match self.options.maintain_order {
true => df.distinct_stable(subset, keep),
false => df.distinct(subset, keep),
true => df.unique_stable(subset, keep),
false => df.unique(subset, keep),
}
}
}
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ Manipulation/ selection
DataFrame.rows
DataFrame.to_dummies
DataFrame.distinct
DataFrame.unique
DataFrame.shrink_to_fit
DataFrame.rechunk
DataFrame.pipe
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/lazyframe.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Manipulation/ selection
LazyFrame.fill_nan
LazyFrame.explode
LazyFrame.distinct
LazyFrame.unique
LazyFrame.drop_nulls
LazyFrame.sort
LazyFrame.melt
Expand Down
14 changes: 13 additions & 1 deletion py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4631,6 +4631,18 @@ def distinct(
maintain_order: bool = True,
subset: Optional[Union[str, List[str]]] = None,
keep: str = "first",
) -> DF:
"""
.. deprecated:: 0.13.13
Please use `unique`
"""
return self.unique(maintain_order, subset, keep)

def unique(
self: DF,
maintain_order: bool = True,
subset: Optional[Union[str, List[str]]] = None,
keep: str = "first",
) -> DF:
"""
Drop duplicate rows from this DataFrame.
Expand All @@ -4651,7 +4663,7 @@ def distinct(
"""
if subset is not None and not isinstance(subset, list):
subset = [subset]
return self._from_pydf(self._df.distinct(maintain_order, subset, keep))
return self._from_pydf(self._df.unique(maintain_order, subset, keep))

def rechunk(self: DF) -> DF:
"""
Expand Down
14 changes: 13 additions & 1 deletion py-polars/polars/internals/lazy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1723,6 +1723,18 @@ def distinct(
maintain_order: bool = True,
subset: Optional[Union[str, List[str]]] = None,
keep: str = "first",
) -> LDF:
"""
.. deprecated:: 0.13.13
Please use `unique`
"""
return self.unique(maintain_order, subset, keep)

def unique(
self: LDF,
maintain_order: bool = True,
subset: Optional[Union[str, List[str]]] = None,
keep: str = "first",
) -> LDF:
"""
Drop duplicate rows from this DataFrame.
Expand All @@ -1743,7 +1755,7 @@ def distinct(
"""
if subset is not None and not isinstance(subset, list):
subset = [subset]
return self._from_pyldf(self._ldf.distinct(maintain_order, subset, keep))
return self._from_pyldf(self._ldf.unique(maintain_order, subset, keep))

def drop_nulls(self: LDF, subset: Optional[Union[List[str], str]] = None) -> LDF:
"""
Expand Down
6 changes: 3 additions & 3 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@ impl<'a> FromPyObject<'a> for Wrap<PivotAgg> {
}
}

impl<'a> FromPyObject<'a> for Wrap<DistinctKeepStrategy> {
impl<'a> FromPyObject<'a> for Wrap<UniqueKeepStrategy> {
fn extract(ob: &'a PyAny) -> PyResult<Self> {
match ob.extract::<&str>()? {
"first" => Ok(Wrap(DistinctKeepStrategy::First)),
"last" => Ok(Wrap(DistinctKeepStrategy::Last)),
"first" => Ok(Wrap(UniqueKeepStrategy::First)),
"last" => Ok(Wrap(UniqueKeepStrategy::Last)),
s => panic!("keep strategy {} is not supported", s),
}
}
Expand Down
8 changes: 4 additions & 4 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1043,18 +1043,18 @@ impl PyDataFrame {
self.df.shift(periods).into()
}

pub fn distinct(
pub fn unique(
&self,
py: Python,
maintain_order: bool,
subset: Option<Vec<String>>,
keep: Wrap<DistinctKeepStrategy>,
keep: Wrap<UniqueKeepStrategy>,
) -> PyResult<Self> {
let df = py.allow_threads(|| {
let subset = subset.as_ref().map(|v| v.as_ref());
match maintain_order {
true => self.df.distinct_stable(subset, keep.0),
false => self.df.distinct(subset, keep.0),
true => self.df.unique_stable(subset, keep.0),
false => self.df.unique(subset, keep.0),
}
.map_err(PyPolarsErr::from)
})?;
Expand Down
10 changes: 5 additions & 5 deletions py-polars/src/lazy/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use polars::lazy::frame::{AllowedOptimizations, LazyCsvReader, LazyFrame, LazyGr
use polars::lazy::prelude::col;
use polars::prelude::{ClosedWindow, CsvEncoding, DataFrame, Field, JoinType, Schema};
use polars::time::*;
use polars_core::frame::DistinctKeepStrategy;
use polars_core::frame::UniqueKeepStrategy;
use polars_core::prelude::{
AnyValue, AsOfOptions, AsofStrategy, DataType, QuantileInterpolOptions, SortOptions,
};
Expand Down Expand Up @@ -571,16 +571,16 @@ impl PyLazyFrame {
ldf.explode(column).into()
}

pub fn distinct(
pub fn unique(
&self,
maintain_order: bool,
subset: Option<Vec<String>>,
keep: Wrap<DistinctKeepStrategy>,
keep: Wrap<UniqueKeepStrategy>,
) -> Self {
let ldf = self.ldf.clone();
match maintain_order {
true => ldf.distinct_stable(subset, keep.0),
false => ldf.distinct(subset, keep.0),
true => ldf.unique_stable(subset, keep.0),
false => ldf.unique(subset, keep.0),
}
.into()
}
Expand Down

0 comments on commit 6b26890

Please sign in to comment.